diff --git a/.codespell/requirements.txt b/.codespell/requirements.txt
index 407f17489c6..ddff454685c 100644
--- a/.codespell/requirements.txt
+++ b/.codespell/requirements.txt
@@ -1 +1 @@
-codespell==2.2.4
+codespell==2.2.5
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 43547a431af..3778d032623 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -17,7 +17,7 @@ jobs:
         sudo apt-get install tcl8.6 tclx
         ./runtest --verbose --tags -slow --dump-logs
     - name: module api test
-      run: ./runtest-moduleapi --verbose --dump-logs
+      run: CFLAGS='-Werror' ./runtest-moduleapi --verbose --dump-logs
     - name: validate commands.def up to date
       run: |
         touch src/commands/ping.json
@@ -31,13 +31,13 @@ jobs:
       - uses: actions/checkout@v3
       - name: make
         # build with TLS module just for compilation coverage
-        run: make SANITIZER=address REDIS_CFLAGS='-Werror' BUILD_TLS=module
+        run: make SANITIZER=address REDIS_CFLAGS='-Werror -DDEBUG_ASSERTIONS' BUILD_TLS=module
       - name: testprep
         run: sudo apt-get install tcl8.6 tclx -y
       - name: test
         run: ./runtest --verbose --tags -slow --dump-logs
       - name: module api test
-        run: ./runtest-moduleapi --verbose --dump-logs
+        run: CFLAGS='-Werror' ./runtest-moduleapi --verbose --dump-logs
 
   build-debian-old:
     runs-on: ubuntu-latest
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index dc7413e59c4..fc92dec2182 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -22,12 +22,12 @@ jobs:
       uses: actions/checkout@v3
 
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@v2
+      uses: github/codeql-action/init@v3
       with:
         languages: ${{ matrix.language }}
 
     - name: Autobuild
-      uses: github/codeql-action/autobuild@v2
+      uses: github/codeql-action/autobuild@v3
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v2
+      uses: github/codeql-action/analyze@v3
diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml
new file mode 100644
index 00000000000..0237c8739fb
--- /dev/null
+++ b/.github/workflows/coverity.yml
@@ -0,0 +1,32 @@
+# Creates and uploads a Coverity build on a schedule
+name: Coverity Scan
+on:
+  schedule:
+  # Run once daily, since below 500k LOC can have 21 builds per week, per https://scan.coverity.com/faq#frequency
+  - cron: '0 0 * * *'
+  # Support manual execution
+  workflow_dispatch:
+jobs:
+  coverity:
+    if: github.repository == 'redis/redis'
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@main
+    - name: Download and extract the Coverity Build Tool
+      run: |
+          wget -q https://scan.coverity.com/download/cxx/linux64 --post-data "token=${{ secrets.COVERITY_SCAN_TOKEN }}&project=redis-unstable" -O cov-analysis-linux64.tar.gz
+          mkdir cov-analysis-linux64
+          tar xzf cov-analysis-linux64.tar.gz --strip 1 -C cov-analysis-linux64
+    - name: Install Redis dependencies
+      run: sudo apt install -y gcc tcl8.6 tclx procps libssl-dev
+    - name: Build with cov-build
+      run: cov-analysis-linux64/bin/cov-build --dir cov-int make
+    - name: Upload the result
+      run: |
+          tar czvf cov-int.tgz cov-int
+          curl \
+            --form project=redis-unstable \
+            --form email=${{ secrets.COVERITY_SCAN_EMAIL }} \
+            --form token=${{ secrets.COVERITY_SCAN_TOKEN }} \
+            --form file=@cov-int.tgz \
+            https://scan.coverity.com/builds
diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml
index 72720e6dd43..8e382ec80a3 100644
--- a/.github/workflows/daily.yml
+++ b/.github/workflows/daily.yml
@@ -60,7 +60,7 @@ jobs:
       run: ./runtest --accurate --verbose --dump-logs ${{github.event.inputs.test_args}}
     - name: module api test
       if: true && !contains(github.event.inputs.skiptests, 'modules')
-      run: ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
+      run: CFLAGS='-Werror' ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
     - name: sentinel tests
       if: true && !contains(github.event.inputs.skiptests, 'sentinel')
       run: ./runtest-sentinel ${{github.event.inputs.cluster_test_args}}
@@ -104,7 +104,7 @@ jobs:
       run: ./runtest --accurate --verbose --dump-logs ${{github.event.inputs.test_args}}
     - name: module api test
       if: true && !contains(github.event.inputs.skiptests, 'modules')
-      run: ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
+      run: CFLAGS='-Werror' ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
     - name: sentinel tests
       if: true && !contains(github.event.inputs.skiptests, 'sentinel')
       run: ./runtest-sentinel ${{github.event.inputs.cluster_test_args}}
@@ -144,7 +144,7 @@ jobs:
       run: ./runtest --accurate --verbose --dump-logs ${{github.event.inputs.test_args}}
     - name: module api test
       if: true && !contains(github.event.inputs.skiptests, 'modules')
-      run: ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
+      run: CFLAGS='-Werror' ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
     - name: sentinel tests
       if: true && !contains(github.event.inputs.skiptests, 'sentinel')
       run: ./runtest-sentinel ${{github.event.inputs.cluster_test_args}}
@@ -181,7 +181,7 @@ jobs:
       run: ./runtest --accurate --verbose --dump-logs ${{github.event.inputs.test_args}}
     - name: module api test
       if: true && !contains(github.event.inputs.skiptests, 'modules')
-      run: ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
+      run: CFLAGS='-Werror' ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
     - name: sentinel tests
       if: true && !contains(github.event.inputs.skiptests, 'sentinel')
       run: ./runtest-sentinel ${{github.event.inputs.cluster_test_args}}
@@ -222,7 +222,7 @@ jobs:
       if: true && !contains(github.event.inputs.skiptests, 'modules')
       run: |
         make -C tests/modules 32bit # the script below doesn't have an argument, we must build manually ahead of time
-        ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
+        CFLAGS='-Werror' ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
     - name: sentinel tests
       if: true && !contains(github.event.inputs.skiptests, 'sentinel')
       run: ./runtest-sentinel ${{github.event.inputs.cluster_test_args}}
@@ -267,7 +267,7 @@ jobs:
     - name: module api test
       if: true && !contains(github.event.inputs.skiptests, 'modules')
       run: |
-        ./runtest-moduleapi --verbose --dump-logs --tls --dump-logs ${{github.event.inputs.test_args}}
+        CFLAGS='-Werror' ./runtest-moduleapi --verbose --dump-logs --tls --dump-logs ${{github.event.inputs.test_args}}
     - name: sentinel tests
       if: true && !contains(github.event.inputs.skiptests, 'sentinel')
       run: |
@@ -311,7 +311,7 @@ jobs:
     - name: module api test
       if: true && !contains(github.event.inputs.skiptests, 'modules')
       run: |
-        ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
+        CFLAGS='-Werror' ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
     - name: sentinel tests
       if: true && !contains(github.event.inputs.skiptests, 'sentinel')
       run: |
@@ -489,7 +489,7 @@ jobs:
         sudo apt-get install tcl8.6 tclx valgrind -y
     - name: module api test
       if: true && !contains(github.event.inputs.skiptests, 'modules')
-      run: ./runtest-moduleapi --valgrind --no-latency --verbose --clients 1 --timeout 2400 --dump-logs ${{github.event.inputs.test_args}}
+      run: CFLAGS='-Werror' ./runtest-moduleapi --valgrind --no-latency --verbose --clients 1 --timeout 2400 --dump-logs ${{github.event.inputs.test_args}}
     - name: unittest
       if: true && !contains(github.event.inputs.skiptests, 'unittest')
       run: |
@@ -554,7 +554,7 @@ jobs:
         sudo apt-get install tcl8.6 tclx valgrind -y
     - name: module api test
       if: true && !contains(github.event.inputs.skiptests, 'modules')
-      run: ./runtest-moduleapi --valgrind --no-latency --verbose --clients 1 --timeout 2400 --dump-logs ${{github.event.inputs.test_args}}
+      run: CFLAGS='-Werror' ./runtest-moduleapi --valgrind --no-latency --verbose --clients 1 --timeout 2400 --dump-logs ${{github.event.inputs.test_args}}
     - name: unittest
       if: true && !contains(github.event.inputs.skiptests, 'unittest')
       run: |
@@ -587,7 +587,7 @@ jobs:
           repository: ${{ env.GITHUB_REPOSITORY }}
           ref: ${{ env.GITHUB_HEAD_REF }}
       - name: make
-        run: make SANITIZER=address REDIS_CFLAGS='-DREDIS_TEST -Werror'
+        run: make SANITIZER=address REDIS_CFLAGS='-DREDIS_TEST -Werror -DDEBUG_ASSERTIONS'
       - name: testprep
         run: |
           sudo apt-get update
@@ -597,7 +597,7 @@ jobs:
         run: ./runtest --accurate --verbose --dump-logs ${{github.event.inputs.test_args}}
       - name: module api test
         if: true && !contains(github.event.inputs.skiptests, 'modules')
-        run: ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
+        run: CFLAGS='-Werror' ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
       - name: sentinel tests
         if: true && !contains(github.event.inputs.skiptests, 'sentinel')
         run: ./runtest-sentinel ${{github.event.inputs.cluster_test_args}}
@@ -644,7 +644,7 @@ jobs:
         run: ./runtest --accurate --verbose --dump-logs ${{github.event.inputs.test_args}}
       - name: module api test
         if: true && !contains(github.event.inputs.skiptests, 'modules')
-        run: ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
+        run: CFLAGS='-Werror' ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
       - name: sentinel tests
         if: true && !contains(github.event.inputs.skiptests, 'sentinel')
         run: ./runtest-sentinel ${{github.event.inputs.cluster_test_args}}
@@ -687,7 +687,7 @@ jobs:
       run: ./runtest --accurate --verbose --dump-logs ${{github.event.inputs.test_args}}
     - name: module api test
       if: true && !contains(github.event.inputs.skiptests, 'modules')
-      run: ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
+      run: CFLAGS='-Werror' ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
     - name: sentinel tests
       if: true && !contains(github.event.inputs.skiptests, 'sentinel')
       run: ./runtest-sentinel ${{github.event.inputs.cluster_test_args}}
@@ -732,7 +732,7 @@ jobs:
     - name: module api test
       if: true && !contains(github.event.inputs.skiptests, 'modules')
       run: |
-        ./runtest-moduleapi --verbose --dump-logs --tls-module --dump-logs ${{github.event.inputs.test_args}}
+        CFLAGS='-Werror' ./runtest-moduleapi --verbose --dump-logs --tls-module --dump-logs ${{github.event.inputs.test_args}}
     - name: sentinel tests
       if: true && !contains(github.event.inputs.skiptests, 'sentinel')
       run: |
@@ -779,7 +779,7 @@ jobs:
     - name: module api test
       if: true && !contains(github.event.inputs.skiptests, 'modules')
       run: |
-        ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
+        CFLAGS='-Werror' ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
     - name: sentinel tests
       if: true && !contains(github.event.inputs.skiptests, 'sentinel')
       run: |
@@ -813,10 +813,10 @@ jobs:
       run: make REDIS_CFLAGS='-Werror'
     - name: test
       if: true && !contains(github.event.inputs.skiptests, 'redis')
-      run: ./runtest --accurate --verbose --verbose --clients 1 --no-latency --dump-logs ${{github.event.inputs.test_args}}
+      run: ./runtest --accurate --verbose --clients 1 --no-latency --dump-logs ${{github.event.inputs.test_args}}
     - name: module api test
       if: true && !contains(github.event.inputs.skiptests, 'modules')
-      run: ./runtest-moduleapi --verbose --verbose --clients 1 --no-latency --dump-logs ${{github.event.inputs.test_args}}
+      run: CFLAGS='-Werror' ./runtest-moduleapi --verbose --clients 1 --no-latency --dump-logs ${{github.event.inputs.test_args}}
 
   test-macos-latest-sentinel:
     runs-on: macos-latest
@@ -870,45 +870,19 @@ jobs:
       if: true && !contains(github.event.inputs.skiptests, 'cluster')
       run: ./runtest-cluster ${{github.event.inputs.cluster_test_args}}
 
-  test-freebsd:
-    runs-on: macos-12
+  build-macos:
+    strategy:
+      matrix:
+        os: [macos-11, macos-13]
+    runs-on: ${{ matrix.os }}
     if: |
       (github.event_name == 'workflow_dispatch' || (github.event_name != 'workflow_dispatch' && github.repository == 'redis/redis')) &&
-      !contains(github.event.inputs.skipjobs, 'freebsd') && !(contains(github.event.inputs.skiptests, 'redis') && contains(github.event.inputs.skiptests, 'modules'))
+      !contains(github.event.inputs.skipjobs, 'macos')
     timeout-minutes: 14400
     steps:
-    - name: prep
-      if: github.event_name == 'workflow_dispatch'
-      run: |
-        echo "GITHUB_REPOSITORY=${{github.event.inputs.use_repo}}" >> $GITHUB_ENV
-        echo "GITHUB_HEAD_REF=${{github.event.inputs.use_git_ref}}" >> $GITHUB_ENV
-        echo "skipjobs: ${{github.event.inputs.skipjobs}}"
-        echo "skiptests: ${{github.event.inputs.skiptests}}"
-        echo "test_args: ${{github.event.inputs.test_args}}"
-        echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}"
-    - uses: actions/checkout@v3
-      with:
-        repository: ${{ env.GITHUB_REPOSITORY }}
-        ref: ${{ env.GITHUB_HEAD_REF }}
-    - name: test
-      uses: vmactions/freebsd-vm@v0.3.1
+    - uses: maxim-lobanov/setup-xcode@v1
       with:
-        usesh: true
-        sync: rsync
-        copyback: false
-        prepare: pkg install -y bash gmake lang/tcl86 lang/tclx
-        run: >
-          gmake || exit 1 ;
-          if echo "${{github.event.inputs.skiptests}}" | grep -vq redis ; then ./runtest --verbose --timeout 2400 --no-latency --dump-logs ${{github.event.inputs.test_args}} || exit 1 ; fi ;
-          if echo "${{github.event.inputs.skiptests}}" | grep -vq modules ; then MAKE=gmake ./runtest-moduleapi --verbose --timeout 2400 --no-latency --dump-logs ${{github.event.inputs.test_args}} || exit 1 ; fi ;
-
-  test-freebsd-sentinel:
-    runs-on: macos-12
-    if: |
-      (github.event_name == 'workflow_dispatch' || (github.event_name != 'workflow_dispatch' && github.repository == 'redis/redis')) &&
-      !contains(github.event.inputs.skipjobs, 'freebsd') && !contains(github.event.inputs.skiptests, 'sentinel')
-    timeout-minutes: 14400
-    steps:
+        xcode-version: latest
     - name: prep
       if: github.event_name == 'workflow_dispatch'
       run: |
@@ -922,22 +896,14 @@ jobs:
       with:
         repository: ${{ env.GITHUB_REPOSITORY }}
         ref: ${{ env.GITHUB_HEAD_REF }}
-    - name: test
-      uses: vmactions/freebsd-vm@v0.3.1
-      with:
-        usesh: true
-        sync: rsync
-        copyback: false
-        prepare: pkg install -y bash gmake lang/tcl86 lang/tclx
-        run: >
-          gmake || exit 1 ;
-          if echo "${{github.event.inputs.skiptests}}" | grep -vq sentinel ; then ./runtest-sentinel ${{github.event.inputs.cluster_test_args}} || exit 1 ; fi ;
+    - name: make
+      run: make REDIS_CFLAGS='-Werror -DREDIS_TEST'
 
-  test-freebsd-cluster:
+  test-freebsd:
     runs-on: macos-12
     if: |
       (github.event_name == 'workflow_dispatch' || (github.event_name != 'workflow_dispatch' && github.repository == 'redis/redis')) &&
-      !contains(github.event.inputs.skipjobs, 'freebsd') && !contains(github.event.inputs.skiptests, 'cluster')
+      !contains(github.event.inputs.skipjobs, 'freebsd')
     timeout-minutes: 14400
     steps:
     - name: prep
@@ -945,24 +911,21 @@ jobs:
       run: |
         echo "GITHUB_REPOSITORY=${{github.event.inputs.use_repo}}" >> $GITHUB_ENV
         echo "GITHUB_HEAD_REF=${{github.event.inputs.use_git_ref}}" >> $GITHUB_ENV
-        echo "skipjobs: ${{github.event.inputs.skipjobs}}"
-        echo "skiptests: ${{github.event.inputs.skiptests}}"
-        echo "test_args: ${{github.event.inputs.test_args}}"
-        echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}"
     - uses: actions/checkout@v3
       with:
         repository: ${{ env.GITHUB_REPOSITORY }}
         ref: ${{ env.GITHUB_HEAD_REF }}
     - name: test
-      uses: vmactions/freebsd-vm@v0.3.1
+      uses: cross-platform-actions/action@v0.22.0
       with:
-        usesh: true
-        sync: rsync
-        copyback: false
-        prepare: pkg install -y bash gmake lang/tcl86 lang/tclx
-        run: >
-          gmake || exit 1 ;
-          if echo "${{github.event.inputs.skiptests}}" | grep -vq cluster ; then ./runtest-cluster ${{github.event.inputs.cluster_test_args}} || exit 1 ; fi ;
+        operating_system: freebsd
+        environment_variables: MAKE
+        version: 13.2
+        shell: bash
+        run: |
+          sudo pkg install -y bash gmake lang/tcl86 lang/tclx
+          gmake
+          ./runtest --single unit/keyspace --single unit/auth --single unit/networking --single unit/protocol
 
   test-alpine-jemalloc:
     runs-on: ubuntu-latest
@@ -995,7 +958,7 @@ jobs:
       run: ./runtest --accurate --verbose --dump-logs ${{github.event.inputs.test_args}}
     - name: module api test
       if: true && !contains(github.event.inputs.skiptests, 'modules')
-      run: ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
+      run: CFLAGS='-Werror' ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
     - name: sentinel tests
       if: true && !contains(github.event.inputs.skiptests, 'sentinel')
       run: ./runtest-sentinel ${{github.event.inputs.cluster_test_args}}
@@ -1034,7 +997,7 @@ jobs:
       run: ./runtest --accurate --verbose --dump-logs ${{github.event.inputs.test_args}}
     - name: module api test
       if: true && !contains(github.event.inputs.skiptests, 'modules')
-      run: ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
+      run: CFLAGS='-Werror' ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
     - name: sentinel tests
       if: true && !contains(github.event.inputs.skiptests, 'sentinel')
       run: ./runtest-sentinel ${{github.event.inputs.cluster_test_args}}
@@ -1071,7 +1034,7 @@ jobs:
       run: ./runtest --log-req-res --no-latency --dont-clean --force-resp3 --tags -slow --verbose --dump-logs  ${{github.event.inputs.test_args}}
     - name: module api test
       if: true && !contains(github.event.inputs.skiptests, 'modules')
-      run: ./runtest-moduleapi --log-req-res --no-latency --dont-clean --force-resp3 --dont-pre-clean --verbose --dump-logs ${{github.event.inputs.test_args}}
+      run: CFLAGS='-Werror' ./runtest-moduleapi --log-req-res --no-latency --dont-clean --force-resp3 --dont-pre-clean --verbose --dump-logs ${{github.event.inputs.test_args}}
     - name: sentinel tests
       if: true && !contains(github.event.inputs.skiptests, 'sentinel')
       run: ./runtest-sentinel --log-req-res --dont-clean --force-resp3 ${{github.event.inputs.cluster_test_args}}
diff --git a/.github/workflows/external.yml b/.github/workflows/external.yml
index 15a9afb6e41..0c884053b76 100644
--- a/.github/workflows/external.yml
+++ b/.github/workflows/external.yml
@@ -23,6 +23,7 @@ jobs:
       run: |
           ./runtest \
             --host 127.0.0.1 --port 6379 \
+            --verbose \
             --tags -slow
     - name: Archive redis log
       if: ${{ failure() }}
@@ -49,6 +50,7 @@ jobs:
       run: |
           ./runtest \
             --host 127.0.0.1 --port 6379 \
+            --verbose \
             --cluster-mode \
             --tags -slow
     - name: Archive redis log
@@ -73,6 +75,7 @@ jobs:
         run: |
           ./runtest \
             --host 127.0.0.1 --port 6379 \
+            --verbose \
             --tags "-slow -needs:debug"
       - name: Archive redis log
         if: ${{ failure() }}
diff --git a/.github/workflows/reply-schemas-linter.yml b/.github/workflows/reply-schemas-linter.yml
index 13fc8ab88d0..6893bb3dca9 100644
--- a/.github/workflows/reply-schemas-linter.yml
+++ b/.github/workflows/reply-schemas-linter.yml
@@ -14,7 +14,7 @@ jobs:
     steps:
       - uses: actions/checkout@v3
       - name: Setup nodejs
-        uses: actions/setup-node@v3
+        uses: actions/setup-node@v4
       - name: Install packages
         run: npm install ajv
       - name: linter
diff --git a/.github/workflows/spell-check.yml b/.github/workflows/spell-check.yml
index 53360741291..77f5437ca2b 100644
--- a/.github/workflows/spell-check.yml
+++ b/.github/workflows/spell-check.yml
@@ -19,7 +19,7 @@ jobs:
         uses: actions/checkout@v3
 
       - name: pip cache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/.cache/pip
           key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index d66769b984e..648a4926856 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -26,7 +26,7 @@ Examples of unacceptable behavior include:
 advances of any kind
 * Trolling, insulting or derogatory comments, and personal or political attacks
 * Public or private harassment
-* Publishing others’ private information, such as a physical or email
+* Publishing others' private information, such as a physical or email
 address, without their explicit permission
 * Other conduct which could reasonably be considered inappropriate in a
 professional setting
@@ -89,7 +89,7 @@ Attribution
 This Code of Conduct is adapted from the Contributor Covenant,
 version 2.0, available at
 https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
-Community Impact Guidelines were inspired by Mozilla’s code of conduct
+Community Impact Guidelines were inspired by Mozilla's code of conduct
 enforcement ladder.
 For answers to common questions about this code of conduct, see the FAQ at
 https://www.contributor-covenant.org/faq. Translations are available at
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 56b71834d6b..4ae73e3b338 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,20 +1,82 @@
-Note: by contributing code to the Redis project in any form, including sending
-a pull request via Github, a code fragment or patch via private email or
-public discussion groups, you agree to release your code under the terms
-of the BSD license that you can find in the COPYING file included in the Redis
-source distribution. You will include BSD license in the COPYING file within
-each source file that you contribute.
+By contributing code to the Redis project in any form you agree to the Redis Software Grant and
+Contributor License Agreement attached below. Only contributions made under the Redis Software Grant
+and Contributor License Agreement may be accepted by Redis, and any contribution is subject to the
+terms of the Redis dual-license under RSALv2/SSPLv1 as described in the LICENSE.txt file included in
+the Redis source distribution.
+
+# REDIS SOFTWARE GRANT AND CONTRIBUTOR LICENSE AGREEMENT
+
+To specify the intellectual property license granted in any Contribution, Redis Ltd., ("**Redis**")
+requires a Software Grant and Contributor License Agreement ("**Agreement**"). This Agreement is for
+your protection as a contributor as well as the protection of Redis and its users; it does not
+change your rights to use your own Contribution for any other purpose.
+
+By making any Contribution, You accept and agree to the following terms and conditions for the
+Contribution. Except for the license granted in this Agreement to Redis and the recipients of the
+software distributed by Redis, You reserve all right, title, and interest in and to Your
+Contribution.
+
+1. **Definitions**
+
+    1.1. "**You**" (or "**Your**") means the copyright owner or legal entity authorized by the
+    copyright owner that is entering into this Agreement with Redis. For legal entities, the entity
+    making a Contribution and all other entities that Control, are Controlled by, or are under
+    common Control with that entity are considered to be a single contributor. For the purposes of
+    this definition, "**Control**" means (i) the power, direct or indirect, to cause the direction
+    or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty
+    percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
+
+    1.2. "**Contribution**" means the code, documentation, or any original work of authorship,
+    including any modifications or additions to an existing work described above.
+
+2. "**Work**" means any software project stewarded by Redis.
+
+3. **Grant of Copyright License**. Subject to the terms and conditions of this Agreement, You grant
+   to Redis and to the recipients of the software distributed by Redis a perpetual, worldwide,
+   non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare
+   derivative works of, publicly display, publicly perform, sublicense, and distribute Your
+   Contribution and such derivative works.
+
+4. **Grant of Patent License**. Subject to the terms and conditions of this Agreement, You grant to
+   Redis and to the recipients of the software distributed by Redis a perpetual, worldwide,
+   non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent
+   license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work,
+   where such license applies only to those patent claims licensable by You that are necessarily
+   infringed by Your Contribution alone or by a combination of Your Contribution with the Work to
+   which such Contribution was submitted. If any entity institutes patent litigation against You or
+   any other entity (including a cross-claim or counterclaim in a lawsuit) alleging that your
+   Contribution, or the Work to which you have contributed, constitutes a direct or contributory
+   patent infringement, then any patent licenses granted to the claimant entity under this Agreement
+   for that Contribution or Work terminate as of the date such litigation is filed.
+
+5. **Representations and Warranties**. You represent and warrant that: (i) You are legally entitled
+   to grant the above licenses; and (ii) if You are an entity, each employee or agent designated by
+   You is authorized to submit the Contribution on behalf of You; and (iii) your Contribution is
+   Your original work, and that it will not infringe on any third party's intellectual property
+   right(s).
+
+6. **Disclaimer**. You are not expected to provide support for Your Contribution, except to the
+   extent You desire to provide support. You may provide support for free, for a fee, or not at all.
+   Unless required by applicable law or agreed to in writing, You provide Your Contribution on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
+   including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT,
+   MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE.
+
+7. **Enforceability**. Nothing in this Agreement will be construed as creating any joint venture,
+   employment relationship, or partnership between You and Redis. If any provision of this Agreement
+   is held to be unenforceable, the remaining provisions of this Agreement will not be affected.
+   This represents the entire agreement between You and Redis relating to the Contribution.
 
 # IMPORTANT: HOW TO USE REDIS GITHUB ISSUES
 
-Github issues SHOULD ONLY BE USED to report bugs, and for DETAILED feature
-requests. Everything else belongs to the Redis Google Group:
+GitHub issues SHOULD ONLY BE USED to report bugs and for DETAILED feature
+requests. Everything else should be asked on Discord:
       
-    https://groups.google.com/forum/m/#!forum/Redis-db
+    https://discord.com/invite/redis
 
 PLEASE DO NOT POST GENERAL QUESTIONS that are not about bugs or suspected
-bugs in the Github issues system. We'll be very happy to help you and provide
-all the support in the mailing list.
+bugs in the GitHub issues system. We'll be delighted to help you and provide
+all the support on Discord.
 
 There is also an active community of Redis users at Stack Overflow:
 
@@ -33,24 +95,24 @@ straight away: if your feature is not a conceptual fit you'll lose a lot of
 time writing the code without any reason. Start by posting in the mailing list
 and creating an issue at Github with the description of, exactly, what you want
 to accomplish and why. Use cases are important for features to be accepted.
-Here you'll see if there is consensus about your idea.
+Here you can see if there is consensus about your idea.
 
 2. If in step 1 you get an acknowledgment from the project leaders, use the
    following procedure to submit a patch:
 
-    a. Fork Redis on github ( https://docs.github.com/en/github/getting-started-with-github/fork-a-repo )
+    a. Fork Redis on GitHub ( https://docs.github.com/en/github/getting-started-with-github/fork-a-repo )
     b. Create a topic branch (git checkout -b my_branch)
     c. Push to your branch (git push origin my_branch)
-    d. Initiate a pull request on github ( https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request )
+    d. Initiate a pull request on GitHub ( https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request )
     e. Done :)
 
 3. Keep in mind that we are very overloaded, so issues and PRs sometimes wait
-for a *very* long time. However this is not lack of interest, as the project
+for a *very* long time. However this is not a lack of interest, as the project
 gets more and more users, we find ourselves in a constant need to prioritize
 certain issues/PRs over others. If you think your issue/PR is very important
 try to popularize it, have other users commenting and sharing their point of
-view and so forth. This helps.
+view, and so forth. This helps.
 
-4. For minor fixes just open a pull request on Github.
+4. For minor fixes - open a pull request on GitHub.
 
-Thanks!
+Additional information on the RSALv2/SSPLv1 dual-license is also found in the LICENSE.txt file.
diff --git a/COPYING b/COPYING
deleted file mode 100644
index a381681a1c2..00000000000
--- a/COPYING
+++ /dev/null
@@ -1,10 +0,0 @@
-Copyright (c) 2006-2020, Salvatore Sanfilippo
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
-    * Neither the name of Redis nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 00000000000..a60c2460490
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,733 @@
+Starting on March 20th, 2024, Redis follows a dual-licensing model with all Redis project code
+contributions under version 7.4 and subsequent releases governed by the Redis Software Grant and
+Contributor License Agreement. After this date, contributions are subject to the user's choice of
+the Redis Source Available License v2 (RSALv2) or the Server Side Public License v1 (SSPLv1), as
+follows:
+
+
+1. Redis Source Available License 2.0 (RSALv2) Agreement
+========================================================
+
+Last Update: December 30, 2023
+
+Acceptance
+----------
+
+This Agreement sets forth the terms and conditions on which the Licensor
+makes available the Software. By installing, downloading, accessing,
+Using, or distributing any of the Software, You agree to all of the
+terms and conditions of this Agreement.
+
+If You are receiving the Software on behalf of Your Company, You
+represent and warrant that You have the authority to agree to this
+Agreement on behalf of such entity.
+
+The Licensor reserves the right to update this Agreement from time to
+time.
+
+The terms below have the meanings set forth below for purposes of this
+Agreement:
+
+Definitions
+-----------
+
+Agreement: this Redis Source Available License 2.0 Agreement.
+
+Control: ownership, directly or indirectly, of substantially all the
+assets of an entity, or the power to direct its management and policies
+by vote, contract, or otherwise.
+
+License: the License as described in the License paragraph below.
+
+Licensor: the entity offering these terms, which includes Redis Ltd. on
+behalf of itself and its subsidiaries and affiliates worldwide.
+
+Modify, Modified, or Modification: copy from or adapt all or part of the
+work in a fashion requiring copyright permission other than making an
+exact copy. The resulting work is called a Modified version of the
+earlier work.
+
+Redis: the Redis software as described in redis.com redis.io.
+
+Software: certain Software components designed to work with Redis and
+provided to You under this Agreement.
+
+Trademark: the trademarks, service marks, and any other similar rights.
+
+Use: anything You do with the Software requiring one of Your Licenses.
+
+You: the recipient of the Software, the individual or entity on whose
+behalf You are agreeing to this Agreement.
+
+Your Company: any legal entity, sole proprietorship, or other kind of
+organization that You work for, plus all organizations that have control
+over, are under the control of, or are under common control with that
+organization.
+
+Your Licenses: means all the Licenses granted to You for the Software
+under this Agreement.
+
+License
+-------
+
+The Licensor grants You a non-exclusive, royalty-free, worldwide,
+non-sublicensable, non-transferable license to use, copy, distribute,
+make available, and prepare derivative works of the Software, in each
+case subject to the limitations and conditions below.
+
+Limitations
+-----------
+
+You may not make the functionality of the Software or a Modified version
+available to third parties as a service or distribute the Software or a
+Modified version in a manner that makes the functionality of the
+Software available to third parties.
+
+Making the functionality of the Software or Modified version available
+to third parties includes, without limitation, enabling third parties to
+interact with the functionality of the Software or Modified version in
+distributed form or remotely through a computer network, offering a
+product or service, the value of which entirely or primarily derives
+from the value of the Software or Modified version, or offering a
+product or service that accomplishes for users the primary purpose of
+the Software or Modified version.
+
+You may not alter, remove, or obscure any licensing, copyright, or other
+notices of the Licensor in the Software. Any use of the Licensor's
+Trademarks is subject to applicable law.
+
+Patents
+-------
+
+The Licensor grants You a License, under any patent claims the Licensor
+can License, or becomes able to License, to make, have made, use, sell,
+offer for sale, import and have imported the Software, in each case
+subject to the limitations and conditions in this License. This License
+does not cover any patent claims that You cause to be infringed by
+Modifications or additions to the Software. If You or Your Company make
+any written claim that the Software infringes or contributes to
+infringement of any patent, your patent License for the Software granted
+under this Agreement ends immediately. If Your Company makes such a
+claim, your patent License ends immediately for work on behalf of Your
+Company.
+
+Notices
+-------
+
+You must ensure that anyone who gets a copy of any part of the Software
+from You also gets a copy of the terms and conditions in this Agreement.
+
+If You modify the Software, You must include in any Modified copies of
+the Software prominent notices stating that You have Modified the
+Software.
+
+No Other Rights
+---------------
+
+The terms and conditions of this Agreement do not imply any Licenses
+other than those expressly granted in this Agreement.
+
+Termination
+-----------
+
+If You Use the Software in violation of this Agreement, such Use is not
+Licensed, and Your Licenses will automatically terminate. If the
+Licensor provides You with a notice of your violation, and You cease all
+violations of this License no later than 30 days after You receive that
+notice, Your Licenses will be reinstated retroactively. However, if You
+violate this Agreement after such reinstatement, any additional
+violation of this Agreement will cause your Licenses to terminate
+automatically and permanently.
+
+No Liability
+------------
+
+As far as the law allows, the Software comes as is, without any
+warranty or condition, and the Licensor will not be liable to You for
+any damages arising out of this Agreement or the Use or nature of the
+Software, under any kind of legal claim.
+
+Governing Law and Jurisdiction
+------------------------------
+
+If You are located in Asia, Pacific, Americas, or other jurisdictions
+not listed below, the Agreement will be construed and enforced in all
+respects in accordance with the laws of the State of California, U.S.A.,
+without reference to its choice of law rules. The courts located in the
+County of Santa Clara, California, have exclusive jurisdiction for all
+purposes relating to this Agreement.
+
+If You are located in Israel, the Agreement will be construed and
+enforced in all respects in accordance with the laws of the State of
+Israel without reference to its choice of law rules. The courts located
+in the Central District of the State of Israel have exclusive
+jurisdiction for all purposes relating to this Agreement.
+
+If You are located in Europe, United Kingdom, Middle East or Africa, the
+Agreement will be construed and enforced in all respects in accordance
+with the laws of England and Wales without reference to its choice of
+law rules. The competent courts located in London, England, have
+exclusive jurisdiction for all purposes relating to this Agreement.
+
+
+
+2. Server Side Public License (SSPL)
+====================================
+
+                     Server Side Public License
+                     VERSION 1, OCTOBER 16, 2018
+
+                    Copyright (c) 2018 MongoDB, Inc.
+
+  Everyone is permitted to copy and distribute verbatim copies of this
+  license document, but changing it is not allowed.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to Server Side Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+  works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+  License.  Each licensee is addressed as "you". "Licensees" and
+  "recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work in
+  a fashion requiring copyright permission, other than the making of an
+  exact copy. The resulting work is called a "modified version" of the
+  earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based on
+  the Program.
+
+  To "propagate" a work means to do anything with it that, without
+  permission, would make you directly or secondarily liable for
+  infringement under applicable copyright law, except executing it on a
+  computer or modifying a private copy. Propagation includes copying,
+  distribution (with or without modification), making available to the
+  public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+  parties to make or receive copies. Mere interaction with a user through a
+  computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices" to the
+  extent that it includes a convenient and prominently visible feature that
+  (1) displays an appropriate copyright notice, and (2) tells the user that
+  there is no warranty for the work (except to the extent that warranties
+  are provided), that licensees may convey the work under this License, and
+  how to view a copy of this License. If the interface presents a list of
+  user commands or options, such as a menu, a prominent item in the list
+  meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work for
+  making modifications to it. "Object code" means any non-source form of a
+  work.
+
+  A "Standard Interface" means an interface that either is an official
+  standard defined by a recognized standards body, or, in the case of
+  interfaces specified for a particular programming language, one that is
+  widely used among developers working in that language.  The "System
+  Libraries" of an executable work include anything, other than the work as
+  a whole, that (a) is included in the normal form of packaging a Major
+  Component, but which is not part of that Major Component, and (b) serves
+  only to enable use of the work with that Major Component, or to implement
+  a Standard Interface for which an implementation is available to the
+  public in source code form. A "Major Component", in this context, means a
+  major essential component (kernel, window system, and so on) of the
+  specific operating system (if any) on which the executable work runs, or
+  a compiler used to produce the work, or an object code interpreter used
+  to run it.
+
+  The "Corresponding Source" for a work in object code form means all the
+  source code needed to generate, install, and (for an executable work) run
+  the object code and to modify the work, including scripts to control
+  those activities. However, it does not include the work's System
+  Libraries, or general-purpose tools or generally available free programs
+  which are used unmodified in performing those activities but which are
+  not part of the work. For example, Corresponding Source includes
+  interface definition files associated with source files for the work, and
+  the source code for shared libraries and dynamically linked subprograms
+  that the work is specifically designed to require, such as by intimate
+  data communication or control flow between those subprograms and other
+  parts of the work.
+
+  The Corresponding Source need not include anything that users can
+  regenerate automatically from other parts of the Corresponding Source.
+
+  The Corresponding Source for a work in source code form is that same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+  copyright on the Program, and are irrevocable provided the stated
+  conditions are met. This License explicitly affirms your unlimited
+  permission to run the unmodified Program, subject to section 13. The
+  output from running a covered work is covered by this License only if the
+  output, given its content, constitutes a covered work. This License
+  acknowledges your rights of fair use or other equivalent, as provided by
+  copyright law.  Subject to section 13, you may make, run and propagate
+  covered works that you do not convey, without conditions so long as your
+  license otherwise remains in force. You may convey covered works to
+  others for the sole purpose of having them make modifications exclusively
+  for you, or provide you with facilities for running those works, provided
+  that you comply with the terms of this License in conveying all
+  material for which you do not control copyright. Those thus making or
+  running the covered works for you must do so exclusively on your
+  behalf, under your direction and control, on terms that prohibit them
+  from making any copies of your copyrighted material outside their
+  relationship with you.
+
+  Conveying under any other circumstances is permitted solely under the
+  conditions stated below. Sublicensing is not allowed; section 10 makes it
+  unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+  measure under any applicable law fulfilling obligations under article 11
+  of the WIPO copyright treaty adopted on 20 December 1996, or similar laws
+  prohibiting or restricting circumvention of such measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+  circumvention of technological measures to the extent such circumvention is
+  effected by exercising rights under this License with respect to the
+  covered work, and you disclaim any intention to limit operation or
+  modification of the work as a means of enforcing, against the work's users,
+  your or third parties' legal rights to forbid circumvention of
+  technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+  receive it, in any medium, provided that you conspicuously and
+  appropriately publish on each copy an appropriate copyright notice; keep
+  intact all notices stating that this License and any non-permissive terms
+  added in accord with section 7 apply to the code; keep intact all notices
+  of the absence of any warranty; and give all recipients a copy of this
+  License along with the Program.  You may charge any price or no price for
+  each copy that you convey, and you may offer support or warranty
+  protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+  produce it from the Program, in the form of source code under the terms
+  of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified it,
+    and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is released
+    under this License and any conditions added under section 7. This
+    requirement modifies the requirement in section 4 to "keep intact all
+    notices".
+
+    c) You must license the entire work, as a whole, under this License to
+    anyone who comes into possession of a copy. This License will therefore
+    apply, along with any applicable section 7 additional terms, to the
+    whole of the work, and all its parts, regardless of how they are
+    packaged. This License gives no permission to license the work in any
+    other way, but it does not invalidate such permission if you have
+    separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your work
+    need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+  works, which are not by their nature extensions of the covered work, and
+  which are not combined with it such as to form a larger program, in or on
+  a volume of a storage or distribution medium, is called an "aggregate" if
+  the compilation and its resulting copyright are not used to limit the
+  access or legal rights of the compilation's users beyond what the
+  individual works permit. Inclusion of a covered work in an aggregate does
+  not cause this License to apply to the other parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms of
+  sections 4 and 5, provided that you also convey the machine-readable
+  Corresponding Source under the terms of this License, in one of these
+  ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium customarily
+    used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a written
+    offer, valid for at least three years and valid for as long as you
+    offer spare parts or customer support for that product model, to give
+    anyone who possesses the object code either (1) a copy of the
+    Corresponding Source for all the software in the product that is
+    covered by this License, on a durable physical medium customarily used
+    for software interchange, for a price no more than your reasonable cost
+    of physically performing this conveying of source, or (2) access to
+    copy the Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source. This alternative is
+    allowed only occasionally and noncommercially, and only if you received
+    the object code with such an offer, in accord with subsection 6b.
+
+    d) Convey the object code by offering access from a designated place
+    (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge. You need not require recipients to copy the
+    Corresponding Source along with the object code. If the place to copy
+    the object code is a network server, the Corresponding Source may be on
+    a different server (operated by you or a third party) that supports
+    equivalent copying facilities, provided you maintain clear directions
+    next to the object code saying where to find the Corresponding Source.
+    Regardless of what server hosts the Corresponding Source, you remain
+    obligated to ensure that it is available for as long as needed to
+    satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided you
+    inform other peers where the object code and Corresponding Source of
+    the work are being offered to the general public at no charge under
+    subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+  from the Corresponding Source as a System Library, need not be included
+  in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+  tangible personal property which is normally used for personal, family,
+  or household purposes, or (2) anything designed or sold for incorporation
+  into a dwelling. In determining whether a product is a consumer product,
+  doubtful cases shall be resolved in favor of coverage. For a particular
+  product received by a particular user, "normally used" refers to a
+  typical or common use of that class of product, regardless of the status
+  of the particular user or of the way in which the particular user
+  actually uses, or expects or is expected to use, the product. A product
+  is a consumer product regardless of whether the product has substantial
+  commercial, industrial or non-consumer uses, unless such uses represent
+  the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+  procedures, authorization keys, or other information required to install
+  and execute modified versions of a covered work in that User Product from
+  a modified version of its Corresponding Source. The information must
+  suffice to ensure that the continued functioning of the modified object
+  code is in no case prevented or interfered with solely because
+  modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+  specifically for use in, a User Product, and the conveying occurs as part
+  of a transaction in which the right of possession and use of the User
+  Product is transferred to the recipient in perpetuity or for a fixed term
+  (regardless of how the transaction is characterized), the Corresponding
+  Source conveyed under this section must be accompanied by the
+  Installation Information. But this requirement does not apply if neither
+  you nor any third party retains the ability to install modified object
+  code on the User Product (for example, the work has been installed in
+  ROM).
+
+  The requirement to provide Installation Information does not include a
+  requirement to continue to provide support service, warranty, or updates
+  for a work that has been modified or installed by the recipient, or for
+  the User Product in which it has been modified or installed. Access
+  to a network may be denied when the modification itself materially
+  and adversely affects the operation of the network or violates the
+  rules and protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided, in
+  accord with this section must be in a format that is publicly documented
+  (and with an implementation available to the public in source code form),
+  and must require no special password or key for unpacking, reading or
+  copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+  License by making exceptions from one or more of its conditions.
+  Additional permissions that are applicable to the entire Program shall be
+  treated as though they were included in this License, to the extent that
+  they are valid under applicable law. If additional permissions apply only
+  to part of the Program, that part may be used separately under those
+  permissions, but the entire Program remains governed by this License
+  without regard to the additional permissions.  When you convey a copy of
+  a covered work, you may at your option remove any additional permissions
+  from that copy, or from any part of it. (Additional permissions may be
+  written to require their own removal in certain cases when you modify the
+  work.) You may place additional permissions on material, added by you to
+  a covered work, for which you have or can give appropriate copyright
+  permission.
+
+  Notwithstanding any other provision of this License, for material you add
+  to a covered work, you may (if authorized by the copyright holders of
+  that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some trade
+    names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that material
+    by anyone who conveys the material (or modified versions of it) with
+    contractual assumptions of liability to the recipient, for any
+    liability that these contractual assumptions directly impose on those
+    licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+  restrictions" within the meaning of section 10. If the Program as you
+  received it, or any part of it, contains a notice stating that it is
+  governed by this License along with a term that is a further restriction,
+  you may remove that term. If a license document contains a further
+  restriction but permits relicensing or conveying under this License, you
+  may add to a covered work material governed by the terms of that license
+  document, provided that the further restriction does not survive such
+  relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you must
+  place, in the relevant source files, a statement of the additional terms
+  that apply to those files, or a notice indicating where to find the
+  applicable terms.  Additional terms, permissive or non-permissive, may be
+  stated in the form of a separately written license, or stated as
+  exceptions; the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+  provided under this License. Any attempt otherwise to propagate or modify
+  it is void, and will automatically terminate your rights under this
+  License (including any patent licenses granted under the third paragraph
+  of section 11).
+
+  However, if you cease all violation of this License, then your license
+  from a particular copyright holder is reinstated (a) provisionally,
+  unless and until the copyright holder explicitly and finally terminates
+  your license, and (b) permanently, if the copyright holder fails to
+  notify you of the violation by some reasonable means prior to 60 days
+  after the cessation.
+
+  Moreover, your license from a particular copyright holder is reinstated
+  permanently if the copyright holder notifies you of the violation by some
+  reasonable means, this is the first time you have received notice of
+  violation of this License (for any work) from that copyright holder, and
+  you cure the violation prior to 30 days after your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+  licenses of parties who have received copies or rights from you under
+  this License. If your rights have been terminated and not permanently
+  reinstated, you do not qualify to receive new licenses for the same
+  material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or run a
+  copy of the Program. Ancillary propagation of a covered work occurring
+  solely as a consequence of using peer-to-peer transmission to receive a
+  copy likewise does not require acceptance. However, nothing other than
+  this License grants you permission to propagate or modify any covered
+  work. These actions infringe copyright if you do not accept this License.
+  Therefore, by modifying or propagating a covered work, you indicate your
+  acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically receives
+  a license from the original licensors, to run, modify and propagate that
+  work, subject to this License. You are not responsible for enforcing
+  compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+  organization, or substantially all assets of one, or subdividing an
+  organization, or merging organizations. If propagation of a covered work
+  results from an entity transaction, each party to that transaction who
+  receives a copy of the work also receives whatever licenses to the work
+  the party's predecessor in interest had or could give under the previous
+  paragraph, plus a right to possession of the Corresponding Source of the
+  work from the predecessor in interest, if the predecessor has it or can
+  get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the rights
+  granted or affirmed under this License. For example, you may not impose a
+  license fee, royalty, or other charge for exercise of rights granted
+  under this License, and you may not initiate litigation (including a
+  cross-claim or counterclaim in a lawsuit) alleging that any patent claim
+  is infringed by making, using, selling, offering for sale, or importing
+  the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+  License of the Program or a work on which the Program is based. The work
+  thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims owned or
+  controlled by the contributor, whether already acquired or hereafter
+  acquired, that would be infringed by some manner, permitted by this
+  License, of making, using, or selling its contributor version, but do not
+  include claims that would be infringed only as a consequence of further
+  modification of the contributor version. For purposes of this definition,
+  "control" includes the right to grant patent sublicenses in a manner
+  consistent with the requirements of this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+  patent license under the contributor's essential patent claims, to make,
+  use, sell, offer for sale, import and otherwise run, modify and propagate
+  the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+  agreement or commitment, however denominated, not to enforce a patent
+  (such as an express permission to practice a patent or covenant not to
+  sue for patent infringement). To "grant" such a patent license to a party
+  means to make such an agreement or commitment not to enforce a patent
+  against the party.
+
+  If you convey a covered work, knowingly relying on a patent license, and
+  the Corresponding Source of the work is not available for anyone to copy,
+  free of charge and under the terms of this License, through a publicly
+  available network server or other readily accessible means, then you must
+  either (1) cause the Corresponding Source to be so available, or (2)
+  arrange to deprive yourself of the benefit of the patent license for this
+  particular work, or (3) arrange, in a manner consistent with the
+  requirements of this License, to extend the patent license to downstream
+  recipients. "Knowingly relying" means you have actual knowledge that, but
+  for the patent license, your conveying the covered work in a country, or
+  your recipient's use of the covered work in a country, would infringe
+  one or more identifiable patents in that country that you have reason
+  to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+  arrangement, you convey, or propagate by procuring conveyance of, a
+  covered work, and grant a patent license to some of the parties receiving
+  the covered work authorizing them to use, propagate, modify or convey a
+  specific copy of the covered work, then the patent license you grant is
+  automatically extended to all recipients of the covered work and works
+  based on it.
+
+  A patent license is "discriminatory" if it does not include within the
+  scope of its coverage, prohibits the exercise of, or is conditioned on
+  the non-exercise of one or more of the rights that are specifically
+  granted under this License. You may not convey a covered work if you are
+  a party to an arrangement with a third party that is in the business of
+  distributing software, under which you make payment to the third party
+  based on the extent of your activity of conveying the work, and under
+  which the third party grants, to any of the parties who would receive the
+  covered work from you, a discriminatory patent license (a) in connection
+  with copies of the covered work conveyed by you (or copies made from
+  those copies), or (b) primarily for and in connection with specific
+  products or compilations that contain the covered work, unless you
+  entered into that arrangement, or that patent license was granted, prior
+  to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting any
+  implied license or other defenses to infringement that may otherwise be
+  available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+  otherwise) that contradict the conditions of this License, they do not
+  excuse you from the conditions of this License. If you cannot use,
+  propagate or convey a covered work so as to satisfy simultaneously your
+  obligations under this License and any other pertinent obligations, then
+  as a consequence you may not use, propagate or convey it at all. For
+  example, if you agree to terms that obligate you to collect a royalty for
+  further conveying from those to whom you convey the Program, the only way
+  you could satisfy both those terms and this License would be to refrain
+  entirely from conveying the Program.
+
+  13. Offering the Program as a Service.
+
+  If you make the functionality of the Program or a modified version
+  available to third parties as a service, you must make the Service Source
+  Code available via network download to everyone at no charge, under the
+  terms of this License. Making the functionality of the Program or
+  modified version available to third parties as a service includes,
+  without limitation, enabling third parties to interact with the
+  functionality of the Program or modified version remotely through a
+  computer network, offering a service the value of which entirely or
+  primarily derives from the value of the Program or modified version, or
+  offering a service that accomplishes for users the primary purpose of the
+  Program or modified version.
+
+  "Service Source Code" means the Corresponding Source for the Program or
+  the modified version, and the Corresponding Source for all programs that
+  you use to make the Program or modified version available as a service,
+  including, without limitation, management software, user interfaces,
+  application program interfaces, automation software, monitoring software,
+  backup software, storage software and hosting software, all such that a
+  user could run an instance of the service using the Service Source Code
+  you make available.
+
+  14. Revised Versions of this License.
+
+  MongoDB, Inc. may publish revised and/or new versions of the Server Side
+  Public License from time to time. Such new versions will be similar in
+  spirit to the present version, but may differ in detail to address new
+  problems or concerns.
+
+  Each version is given a distinguishing version number. If the Program
+  specifies that a certain numbered version of the Server Side Public
+  License "or any later version" applies to it, you have the option of
+  following the terms and conditions either of that numbered version or of
+  any later version published by MongoDB, Inc. If the Program does not
+  specify a version number of the Server Side Public License, you may
+  choose any version ever published by MongoDB, Inc.
+
+  If the Program specifies that a proxy can decide which future versions of
+  the Server Side Public License can be used, that proxy's public statement
+  of acceptance of a version permanently authorizes you to choose that
+  version for the Program.
+
+  Later license versions may give you additional or different permissions.
+  However, no additional obligations are imposed on any author or copyright
+  holder as a result of your choosing to follow a later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+  APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+  HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+  OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+  THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+  PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+  IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+  ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+  WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+  THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING
+  ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF
+  THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO
+  LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU
+  OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+  PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided above
+  cannot be given local legal effect according to their terms, reviewing
+  courts shall apply local law that most closely approximates an absolute
+  waiver of all civil liability in connection with the Program, unless a
+  warranty or assumption of liability accompanies a copy of the Program in
+  return for a fee.
+
+                        END OF TERMS AND CONDITIONS
diff --git a/README.md b/README.md
index 4a1ce791492..ff483f9e1d7 100644
--- a/README.md
+++ b/README.md
@@ -217,18 +217,28 @@ You'll be able to stop and start Redis using the script named
 Code contributions
 -----------------
 
-Note: By contributing code to the Redis project in any form, including sending
-a pull request via Github, a code fragment or patch via private email or
-public discussion groups, you agree to release your code under the terms
-of the BSD license that you can find in the [COPYING][1] file included in the Redis
-source distribution.
+By contributing code to the Redis project in any form, including sending a pull request via GitHub,
+a code fragment or patch via private email or public discussion groups, you agree to release your
+code under the terms of the [Redis Software Grant and Contributor License Agreement][1]. Redis software
+contains contributions to the original Redis core project, which are owned by their contributors and
+licensed under the 3BSD license. Any copy of that license in this repository applies only to those
+contributions. Redis releases all Redis project versions from 7.4.x and thereafter under the
+RSALv2/SSPL dual-license as described in the [LICENSE.txt][2] file included in the Redis source distribution.
+
+Please see the [CONTRIBUTING.md][1] file in this source distribution for more information. For
+security bugs and vulnerabilities, please see [SECURITY.md][3].
+
+[1]: https://github.com/redis/redis/blob/unstable/CONTRIBUTING.md
+[2]: https://github.com/redis/redis/blob/unstable/LICENSE.txt
+[3]: https://github.com/redis/redis/blob/unstable/SECURITY.md
 
-Please see the [CONTRIBUTING.md][2] file in this source distribution for more
-information. For security bugs and vulnerabilities, please see [SECURITY.md][3].
+Redis Trademarks
+----------------
 
-[1]: https://github.com/redis/redis/blob/unstable/COPYING
-[2]: https://github.com/redis/redis/blob/unstable/CONTRIBUTING.md
-[3]: https://github.com/redis/redis/blob/unstable/SECURITY.md
+The purpose of a trademark is to identify the goods and services of a person or company without
+causing confusion. As the registered owner of its name and logo, Redis accepts certain limited uses
+of its trademarks but it has requirements that must be followed as described in its Trademark
+Guidelines available at: https://redis.com/legal/trademark-guidelines/.
 
 Redis internals
 ===
@@ -420,7 +430,7 @@ implementations are the following:
 * `lookupKeyRead()` and `lookupKeyWrite()` are used in order to get a pointer to the value associated to a given key, or `NULL` if the key does not exist.
 * `dbAdd()` and its higher level counterpart `setKey()` create a new key in a Redis database.
 * `dbDelete()` removes a key and its associated value.
-* `emptyDb()` removes an entire single database or all the databases defined.
+* `emptyData()` removes an entire single database or all the databases defined.
 
 The rest of the file implements the generic commands exposed to the client.
 
@@ -458,9 +468,9 @@ Script
 
 The script unit is composed of 3 units:
 * `script.c` - integration of scripts with Redis (commands execution, set replication/resp, ...)
-* `script_lua.c` - responsible to execute Lua code, uses script.c to interact with Redis from within the Lua code.
-* `function_lua.c` - contains the Lua engine implementation, uses script_lua.c to execute the Lua code.
-* `functions.c` - contains Redis Functions implementation (FUNCTION command), uses functions_lua.c if the function it wants to invoke needs the Lua engine.
+* `script_lua.c` - responsible to execute Lua code, uses `script.c` to interact with Redis from within the Lua code.
+* `function_lua.c` - contains the Lua engine implementation, uses `script_lua.c` to execute the Lua code.
+* `functions.c` - contains Redis Functions implementation (`FUNCTION` command), uses `functions_lua.c` if the function it wants to invoke needs the Lua engine.
 * `eval.c` - contains the `eval` implementation using `script_lua.c` to invoke the Lua code.
 
 
diff --git a/REDISCONTRIBUTIONS.txt b/REDISCONTRIBUTIONS.txt
new file mode 100644
index 00000000000..9a98f950e80
--- /dev/null
+++ b/REDISCONTRIBUTIONS.txt
@@ -0,0 +1,30 @@
+Copyright (c) 2006-Present, Redis Ltd. and Contributors
+All rights reserved.
+
+Note: Continued Applicability of the BSD-3-Clause License
+
+Despite the shift to the dual-licensing model with Redis version 7.4 (RSALv2 or SSPLv1), portions of
+Redis remain available subject to the BSD-3-Clause License (BSD). See below for the full BSD
+license:
+
+Redistribution and use in source and binary forms, with or without modification, are permitted
+provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this list of conditions
+and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
+and the following disclaimer in the documentation and/or other materials provided with the
+distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse
+or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/SECURITY.md b/SECURITY.md
index ea66aaf65e9..5c348319dbe 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -11,17 +11,17 @@ unless this is not possible or feasible with a reasonable effort.
 
 | Version | Supported          |
 | ------- | ------------------ |
+| 7.2.x   | :white_check_mark: |
 | 7.0.x   | :white_check_mark: |
 | 6.2.x   | :white_check_mark: |
-| 6.0.x   | :white_check_mark: |
-| < 6.0   | :x:                |
+| < 6.2   | :x:                |
 
 ## Reporting a Vulnerability
 
-If you believe you’ve discovered a serious vulnerability, please contact the
+If you believe you've discovered a serious vulnerability, please contact the
 Redis core team at redis@redis.io. We will evaluate your report and if
 necessary issue a fix and an advisory. If the issue was previously undisclosed,
-we’ll also mention your name in the credits.
+we'll also mention your name in the credits.
 
 ## Responsible Disclosure
 
@@ -36,7 +36,7 @@ embargo on public disclosure.
 
 Vendors on the list are individuals or organizations that maintain Redis
 distributions or provide Redis as a service, who have third party users who
-will benefit from the vendor’s ability to prepare for a new version or deploy a
+will benefit from the vendor's ability to prepare for a new version or deploy a
 fix early.
 
 If you believe you should be on the list, please contact us and we will
diff --git a/deps/Makefile b/deps/Makefile
index c03c79790cb..3bf0363d5c2 100644
--- a/deps/Makefile
+++ b/deps/Makefile
@@ -3,6 +3,7 @@
 uname_S:= $(shell sh -c 'uname -s 2>/dev/null || echo not')
 
 LUA_DEBUG?=no
+LUA_COVERAGE?=no
 
 CCCOLOR="\033[34m"
 LINKCOLOR="\033[34;1m"
@@ -85,6 +86,11 @@ ifeq ($(LUA_DEBUG),yes)
 else
 	LUA_CFLAGS+= -O2 
 endif
+ifeq ($(LUA_COVERAGE),yes)
+	LUA_CFLAGS += -fprofile-arcs -ftest-coverage
+	LUA_LDFLAGS += -fprofile-arcs -ftest-coverage
+endif
+
 # lua's Makefile defines AR="ar rcu", which is unusual, and makes it more
 # challenging to cross-compile lua (and redis).  These defines make it easier
 # to fit redis into cross-compilation environments, which typically set AR.
diff --git a/deps/README.md b/deps/README.md
index 6d434c21f3c..8da051baa79 100644
--- a/deps/README.md
+++ b/deps/README.md
@@ -63,6 +63,10 @@ Hiredis
 
 Hiredis is used by Sentinel, `redis-cli` and `redis-benchmark`. Like Redis, uses the SDS string library, but not necessarily the same version. In order to avoid conflicts, this version has all SDS identifiers prefixed by `hi`.
 
+1. `git subtree pull --prefix deps/hiredis https://github.com/redis/hiredis.git <version-tag> --squash`<br>
+This should hopefully merge the local changes into the new version.
+2. Conflicts will arise (due to our changes) you'll need to resolve them and commit.
+
 Linenoise
 ---
 
diff --git a/deps/hiredis/.github/workflows/build.yml b/deps/hiredis/.github/workflows/build.yml
index 1a1ef515318..581800b4f70 100644
--- a/deps/hiredis/.github/workflows/build.yml
+++ b/deps/hiredis/.github/workflows/build.yml
@@ -133,8 +133,8 @@ jobs:
 
       - name: Install dependencies
         run: |
-          brew install openssl redis@6.2
-          brew link redis@6.2 --force
+          brew install openssl redis@7.0
+          brew link redis@7.0 --force
 
       - name: Build hiredis
         run: USE_SSL=1 make
diff --git a/deps/hiredis/.github/workflows/test.yml b/deps/hiredis/.github/workflows/test.yml
index 7812af6f74c..1a2c60b795a 100644
--- a/deps/hiredis/.github/workflows/test.yml
+++ b/deps/hiredis/.github/workflows/test.yml
@@ -60,7 +60,7 @@ jobs:
     steps:
     - name: Install qemu
       if: matrix.emulator
-      run: sudo apt-get install -y qemu-user
+      run: sudo apt-get update && sudo apt-get install -y qemu-user
     - name: Install platform toolset
       if: matrix.toolset
       run: sudo apt-get install -y gcc-${{matrix.toolset}}
diff --git a/deps/hiredis/CHANGELOG.md b/deps/hiredis/CHANGELOG.md
index a2e065b2cd3..801c4072937 100644
--- a/deps/hiredis/CHANGELOG.md
+++ b/deps/hiredis/CHANGELOG.md
@@ -1,12 +1,63 @@
+## [1.2.0](https://github.com/redis/hiredis/tree/v1.2.0) - (2023-06-04)
+
+Announcing Hiredis v1.2.0 with with new adapters, and a great many bug fixes.
+
+## 🚀 New Features
+
+- Add sdevent adapter @Oipo (#1144)
+- Allow specifying the keepalive interval @michael-grunder (#1168)
+- Add RedisModule adapter @tezc (#1182)
+- Helper for setting TCP_USER_TIMEOUT socket option @zuiderkwast (#1188)
+
+## 🐛 Bug Fixes
+
+- Fix a typo in b6a052f. @yossigo (#1190)
+- Fix wincrypt symbols conflict @hudayou (#1151)
+- Don't attempt to set a timeout if we are in an error state. @michael-grunder (#1180)
+- Accept -nan per the RESP3 spec recommendation. @michael-grunder (#1178)
+- Fix colliding option values @zuiderkwast (#1172)
+- Ensure functionality without `_MSC_VER` definition @windyakin (#1194)
+
+## 🧰 Maintenance
+
+- Add a test for the TCP_USER_TIMEOUT option. @michael-grunder (#1192)
+- Add -Werror as a default. @yossigo (#1193)
+- CI: Update homebrew Redis version. @yossigo (#1191)
+- Fix typo in makefile. @michael-grunder (#1179)
+- Write a version file for the CMake package @Neverlord (#1165)
+- CMakeLists.txt: respect BUILD_SHARED_LIBS @ffontaine (#1147)
+- Cmake static or shared @autoantwort (#1160)
+- fix typo @tillkruss (#1153)
+- Add a test ensuring we don't clobber connection error. @michael-grunder (#1181)
+- Search for openssl on macOS @michael-grunder (#1169)
+
+
+## Contributors
+We'd like to thank all the contributors who worked on this release!
+
+<a href="https://github.com/neverlord"><img src="https://github.com/neverlord.png" width="32" height="32"></a>
+<a href="https://github.com/Oipo"><img src="https://github.com/Oipo.png" width="32" height="32"></a>
+<a href="https://github.com/autoantwort"><img src="https://github.com/autoantwort.png" width="32" height="32"></a>
+<a href="https://github.com/ffontaine"><img src="https://github.com/ffontaine.png" width="32" height="32"></a>
+<a href="https://github.com/hudayou"><img src="https://github.com/hudayou.png" width="32" height="32"></a>
+<a href="https://github.com/michael-grunder"><img src="https://github.com/michael-grunder.png" width="32" height="32"></a>
+<a href="https://github.com/postgraph"><img src="https://github.com/postgraph.png" width="32" height="32"></a>
+<a href="https://github.com/tezc"><img src="https://github.com/tezc.png" width="32" height="32"></a>
+<a href="https://github.com/tillkruss"><img src="https://github.com/tillkruss.png" width="32" height="32"></a>
+<a href="https://github.com/vityafx"><img src="https://github.com/vityafx.png" width="32" height="32"></a>
+<a href="https://github.com/windyakin"><img src="https://github.com/windyakin.png" width="32" height="32"></a>
+<a href="https://github.com/yossigo"><img src="https://github.com/yossigo.png" width="32" height="32"></a>
+<a href="https://github.com/zuiderkwast"><img src="https://github.com/zuiderkwast.png" width="32" height="32"></a>
+
 ## [1.1.0](https://github.com/redis/hiredis/tree/v1.1.0) - (2022-11-15)
 
 Announcing Hiredis v1.1.0 GA with better SSL convenience, new async adapters and a great many bug fixes.
 
-**NOTE**:  Hiredis can now return `nan` in addition to `-inf` and `inf` when returning a `REDIS_REPLY_DOUBLE`. 
+**NOTE**:  Hiredis can now return `nan` in addition to `-inf` and `inf` when returning a `REDIS_REPLY_DOUBLE`.
 
 ## 🐛 Bug Fixes
 
-- Add support for nan in RESP3 double [@filipecosta90](https://github.com/filipecosta90) 
+- Add support for nan in RESP3 double [@filipecosta90](https://github.com/filipecosta90)
   ([\#1133](https://github.com/redis/hiredis/pull/1133))
 
 ## 🧰 Maintenance
@@ -14,7 +65,7 @@ Announcing Hiredis v1.1.0 GA with better SSL convenience, new async adapters and
 - Add an example that calls redisCommandArgv [@michael-grunder](https://github.com/michael-grunder)
   ([\#1140](https://github.com/redis/hiredis/pull/1140))
 - fix flag reference [@pata00](https://github.com/pata00) ([\#1136](https://github.com/redis/hiredis/pull/1136))
-- Make freeing a NULL redisAsyncContext a no op. [@michael-grunder](https://github.com/michael-grunder) 
+- Make freeing a NULL redisAsyncContext a no op. [@michael-grunder](https://github.com/michael-grunder)
   ([\#1135](https://github.com/redis/hiredis/pull/1135))
 - CI updates ([@bjosv](https://github.com/redis/bjosv) ([\#1139](https://github.com/redis/hiredis/pull/1139))
 
diff --git a/deps/hiredis/Makefile b/deps/hiredis/Makefile
index f31293e90c2..bd2106b1d12 100644
--- a/deps/hiredis/Makefile
+++ b/deps/hiredis/Makefile
@@ -39,7 +39,7 @@ export REDIS_TEST_CONFIG
 CC:=$(shell sh -c 'type $${CC%% *} >/dev/null 2>/dev/null && echo $(CC) || echo gcc')
 CXX:=$(shell sh -c 'type $${CXX%% *} >/dev/null 2>/dev/null && echo $(CXX) || echo g++')
 OPTIMIZATION?=-O3
-WARNINGS=-Wall -W -Wstrict-prototypes -Wwrite-strings -Wno-missing-field-initializers
+WARNINGS=-Wall -Wextra -Werror -Wstrict-prototypes -Wwrite-strings -Wno-missing-field-initializers
 DEBUG_FLAGS?= -g -ggdb
 REAL_CFLAGS=$(OPTIMIZATION) -fPIC $(CPPFLAGS) $(CFLAGS) $(WARNINGS) $(DEBUG_FLAGS) $(PLATFORM_FLAGS)
 REAL_LDFLAGS=$(LDFLAGS)
@@ -311,7 +311,7 @@ install: $(DYLIBNAME) $(STLIBNAME) $(PKGCONFNAME) $(SSL_INSTALL)
 	$(INSTALL) hiredis.h async.h read.h sds.h alloc.h sockcompat.h $(INSTALL_INCLUDE_PATH)
 	$(INSTALL) adapters/*.h $(INSTALL_INCLUDE_PATH)/adapters
 	$(INSTALL) $(DYLIBNAME) $(INSTALL_LIBRARY_PATH)/$(DYLIB_MINOR_NAME)
-	cd $(INSTALL_LIBRARY_PATH) && ln -sf $(DYLIB_MINOR_NAME) $(DYLIBNAME)
+	cd $(INSTALL_LIBRARY_PATH) && ln -sf $(DYLIB_MINOR_NAME) $(DYLIBNAME) && ln -sf $(DYLIB_MINOR_NAME) $(DYLIB_MAJOR_NAME)
 	$(INSTALL) $(STLIBNAME) $(INSTALL_LIBRARY_PATH)
 	mkdir -p $(INSTALL_PKGCONF_PATH)
 	$(INSTALL) $(PKGCONFNAME) $(INSTALL_PKGCONF_PATH)
@@ -320,7 +320,7 @@ install-ssl: $(SSL_DYLIBNAME) $(SSL_STLIBNAME) $(SSL_PKGCONFNAME)
 	mkdir -p $(INSTALL_INCLUDE_PATH) $(INSTALL_LIBRARY_PATH)
 	$(INSTALL) hiredis_ssl.h $(INSTALL_INCLUDE_PATH)
 	$(INSTALL) $(SSL_DYLIBNAME) $(INSTALL_LIBRARY_PATH)/$(SSL_DYLIB_MINOR_NAME)
-	cd $(INSTALL_LIBRARY_PATH) && ln -sf $(SSL_DYLIB_MINOR_NAME) $(SSL_DYLIBNAME)
+	cd $(INSTALL_LIBRARY_PATH) && ln -sf $(SSL_DYLIB_MINOR_NAME) $(SSL_DYLIBNAME) && ln -sf $(SSL_DYLIB_MINOR_NAME) $(SSL_DYLIB_MAJOR_NAME)
 	$(INSTALL) $(SSL_STLIBNAME) $(INSTALL_LIBRARY_PATH)
 	mkdir -p $(INSTALL_PKGCONF_PATH)
 	$(INSTALL) $(SSL_PKGCONFNAME) $(INSTALL_PKGCONF_PATH)
diff --git a/deps/hiredis/hiredis.c b/deps/hiredis/hiredis.c
index fd200173f2d..8012035a05c 100644
--- a/deps/hiredis/hiredis.c
+++ b/deps/hiredis/hiredis.c
@@ -392,12 +392,12 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) {
                     while (*_p != '\0' && strchr(flags,*_p) != NULL) _p++;
 
                     /* Field width */
-                    while (*_p != '\0' && isdigit(*_p)) _p++;
+                    while (*_p != '\0' && isdigit((int) *_p)) _p++;
 
                     /* Precision */
                     if (*_p == '.') {
                         _p++;
-                        while (*_p != '\0' && isdigit(*_p)) _p++;
+                        while (*_p != '\0' && isdigit((int) *_p)) _p++;
                     }
 
                     /* Copy va_list before consuming with va_arg */
diff --git a/deps/hiredis/hiredis.h b/deps/hiredis/hiredis.h
index 044a344e00e..635988b7e1c 100644
--- a/deps/hiredis/hiredis.h
+++ b/deps/hiredis/hiredis.h
@@ -46,9 +46,9 @@ typedef long long ssize_t;
 #include "alloc.h" /* for allocation wrappers */
 
 #define HIREDIS_MAJOR 1
-#define HIREDIS_MINOR 1
-#define HIREDIS_PATCH 1
-#define HIREDIS_SONAME 1.1.1-dev
+#define HIREDIS_MINOR 2
+#define HIREDIS_PATCH 0
+#define HIREDIS_SONAME 1.1.0
 
 /* Connection type can be blocking or non-blocking and is set in the
  * least significant bit of the flags field in redisContext. */
diff --git a/deps/hiredis/net.c b/deps/hiredis/net.c
index d75a966580d..33fe0b94f4a 100644
--- a/deps/hiredis/net.c
+++ b/deps/hiredis/net.c
@@ -234,6 +234,7 @@ int redisContextSetTcpUserTimeout(redisContext *c, unsigned int timeout) {
     res = setsockopt(c->fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &timeout, sizeof(timeout));
 #else
     res = -1;
+    errno = ENOTSUP;
     (void)timeout;
 #endif
     if (res == -1) {
diff --git a/deps/hiredis/sds.c b/deps/hiredis/sds.c
index f99962eb668..ac2b483525d 100644
--- a/deps/hiredis/sds.c
+++ b/deps/hiredis/sds.c
@@ -948,7 +948,7 @@ hisds *hi_sdssplitargs(const char *line, int *argc) {
     *argc = 0;
     while(1) {
         /* skip blanks */
-        while(*p && isspace(*p)) p++;
+        while(*p && isspace((int) *p)) p++;
         if (*p) {
             /* get a token */
             int inq=0;  /* set to 1 if we are in "quotes" */
@@ -959,8 +959,8 @@ hisds *hi_sdssplitargs(const char *line, int *argc) {
             while(!done) {
                 if (inq) {
                     if (*p == '\\' && *(p+1) == 'x' &&
-                                             isxdigit(*(p+2)) &&
-                                             isxdigit(*(p+3)))
+                                             isxdigit((int) *(p+2)) &&
+                                             isxdigit((int) *(p+3)))
                     {
                         unsigned char byte;
 
@@ -984,7 +984,7 @@ hisds *hi_sdssplitargs(const char *line, int *argc) {
                     } else if (*p == '"') {
                         /* closing quote must be followed by a space or
                          * nothing at all. */
-                        if (*(p+1) && !isspace(*(p+1))) goto err;
+                        if (*(p+1) && !isspace((int) *(p+1))) goto err;
                         done=1;
                     } else if (!*p) {
                         /* unterminated quotes */
@@ -999,7 +999,7 @@ hisds *hi_sdssplitargs(const char *line, int *argc) {
                     } else if (*p == '\'') {
                         /* closing quote must be followed by a space or
                          * nothing at all. */
-                        if (*(p+1) && !isspace(*(p+1))) goto err;
+                        if (*(p+1) && !isspace((int) *(p+1))) goto err;
                         done=1;
                     } else if (!*p) {
                         /* unterminated quotes */
diff --git a/deps/hiredis/ssl.c b/deps/hiredis/ssl.c
index 1431803ad96..9ab18cc0e52 100644
--- a/deps/hiredis/ssl.c
+++ b/deps/hiredis/ssl.c
@@ -59,6 +59,8 @@
 #include "async_private.h"
 #include "hiredis_ssl.h"
 
+#define OPENSSL_1_1_0 0x10100000L
+
 void __redisSetError(redisContext *c, int type, const char *str);
 
 struct redisSSLContext {
@@ -100,7 +102,7 @@ redisContextFuncs redisContextSSLFuncs;
  * Note that this is only required for OpenSSL < 1.1.0.
  */
 
-#if OPENSSL_VERSION_NUMBER < 0x10100000L
+#if OPENSSL_VERSION_NUMBER < OPENSSL_1_1_0
 #define HIREDIS_USE_CRYPTO_LOCKS
 #endif
 
@@ -256,13 +258,25 @@ redisSSLContext *redisCreateSSLContextWithOptions(redisSSLOptions *options, redi
     if (ctx == NULL)
         goto error;
 
-    ctx->ssl_ctx = SSL_CTX_new(SSLv23_client_method());
+    const SSL_METHOD *ssl_method;
+#if OPENSSL_VERSION_NUMBER >= OPENSSL_1_1_0
+    ssl_method = TLS_client_method();
+#else
+    ssl_method = SSLv23_client_method();
+#endif
+
+    ctx->ssl_ctx = SSL_CTX_new(ssl_method);
     if (!ctx->ssl_ctx) {
         if (error) *error = REDIS_SSL_CTX_CREATE_FAILED;
         goto error;
     }
 
-    SSL_CTX_set_options(ctx->ssl_ctx, SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3);
+#if OPENSSL_VERSION_NUMBER >= OPENSSL_1_1_0
+    SSL_CTX_set_min_proto_version(ctx->ssl_ctx, TLS1_2_VERSION);
+#else
+    SSL_CTX_set_options(ctx->ssl_ctx, SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3 | SSL_OP_NO_TLSv1 | SSL_OP_NO_TLSv1_1);
+#endif
+
     SSL_CTX_set_verify(ctx->ssl_ctx, options->verify_mode, NULL);
 
     if ((cert_filename != NULL && private_key_filename == NULL) ||
diff --git a/deps/hiredis/test.c b/deps/hiredis/test.c
index ecfe075b7d1..f47e9ef2a2a 100644
--- a/deps/hiredis/test.c
+++ b/deps/hiredis/test.c
@@ -78,7 +78,7 @@ static int tests = 0, fails = 0, skips = 0;
 
 static void millisleep(int ms)
 {
-#if _MSC_VER
+#ifdef _MSC_VER
     Sleep(ms);
 #else
     usleep(ms*1000);
@@ -409,10 +409,19 @@ static void test_tcp_options(struct config cfg) {
     redisContext *c;
 
     c = do_connect(cfg);
+
     test("We can enable TCP_KEEPALIVE: ");
     test_cond(redisEnableKeepAlive(c) == REDIS_OK);
 
-    disconnect(c, 0);
+#ifdef TCP_USER_TIMEOUT
+    test("We can set TCP_USER_TIMEOUT: ");
+    test_cond(redisSetTcpUserTimeout(c, 100) == REDIS_OK);
+#else
+    test("Setting TCP_USER_TIMEOUT errors when unsupported: ");
+    test_cond(redisSetTcpUserTimeout(c, 100) == REDIS_ERR && c->err == REDIS_ERR_IO);
+#endif
+
+    redisFree(c);
 }
 
 static void test_reply_reader(void) {
@@ -1567,6 +1576,9 @@ static void test_throughput(struct config config) {
 // }
 
 #ifdef HIREDIS_TEST_ASYNC
+
+#pragma GCC diagnostic ignored "-Woverlength-strings"   /* required on gcc 4.8.x due to assert statements */
+
 struct event_base *base;
 
 typedef struct TestState {
diff --git a/deps/linenoise/README.markdown b/deps/linenoise/README.markdown
index 1afea2ae65c..b3752da162b 100644
--- a/deps/linenoise/README.markdown
+++ b/deps/linenoise/README.markdown
@@ -108,7 +108,7 @@ to search and re-edit already inserted lines of text.
 
 The followings are the history API calls:
 
-    int linenoiseHistoryAdd(const char *line);
+    int linenoiseHistoryAdd(const char *line, int is_sensitive);
     int linenoiseHistorySetMaxLen(int len);
     int linenoiseHistorySave(const char *filename);
     int linenoiseHistoryLoad(const char *filename);
diff --git a/deps/linenoise/linenoise.c b/deps/linenoise/linenoise.c
index dd86abe86e2..30d19a9152b 100644
--- a/deps/linenoise/linenoise.c
+++ b/deps/linenoise/linenoise.c
@@ -117,6 +117,7 @@
 #include <sys/types.h>
 #include <sys/ioctl.h>
 #include <unistd.h>
+#include <assert.h>
 #include "linenoise.h"
 
 #define LINENOISE_DEFAULT_HISTORY_MAX_LEN 100
@@ -134,6 +135,18 @@ static int atexit_registered = 0; /* Register atexit just 1 time. */
 static int history_max_len = LINENOISE_DEFAULT_HISTORY_MAX_LEN;
 static int history_len = 0;
 static char **history = NULL;
+static int *history_sensitive = NULL; /* An array records whether each line in
+                                       * history is sensitive. */
+
+static int reverse_search_mode_enabled = 0;
+static int reverse_search_direction = 0; /* 1 means forward, -1 means backward. */
+static int cycle_to_next_search = 0; /* indicates whether to continue the search with CTRL+S or CTRL+R. */
+static char search_result[LINENOISE_MAX_LINE];
+static char search_result_friendly[LINENOISE_MAX_LINE];
+static int search_result_history_index = 0;
+static int search_result_start_offset = 0;
+static int ignore_once_hint = 0; /* Flag to ignore hint once, preventing it from interfering
+                                  * with search results right after exiting search mode. */
 
 /* The linenoiseState structure represents the state during line editing.
  * We pass this state to functions implementing specific editing
@@ -143,6 +156,7 @@ struct linenoiseState {
     int ofd;            /* Terminal stdout file descriptor. */
     char *buf;          /* Edited line buffer. */
     size_t buflen;      /* Edited line buffer size. */
+    const char *origin_prompt; /* Original prompt, used to restore when exiting search mode. */
     const char *prompt; /* Prompt to display. */
     size_t plen;        /* Prompt length. */
     size_t pos;         /* Current cursor position. */
@@ -153,6 +167,13 @@ struct linenoiseState {
     int history_index;  /* The history index we are currently editing. */
 };
 
+typedef struct {
+    int len;                /* Length of the result string. */
+    char *result;           /* Search result string. */
+    int search_term_index;  /* Position of the search term in the history record. */
+    int search_term_len;    /* Length of the search term. */
+} linenoiseHistorySearchResult;
+
 enum KEY_ACTION{
 	KEY_NULL = 0,	    /* NULL */
 	CTRL_A = 1,         /* Ctrl+a */
@@ -161,6 +182,7 @@ enum KEY_ACTION{
 	CTRL_D = 4,         /* Ctrl-d */
 	CTRL_E = 5,         /* Ctrl-e */
 	CTRL_F = 6,         /* Ctrl-f */
+	CTRL_G = 7,         /* Ctrl-g */
 	CTRL_H = 8,         /* Ctrl-h */
 	TAB = 9,            /* Tab */
 	NL = 10,            /* Enter typed before raw mode was enabled */
@@ -169,6 +191,8 @@ enum KEY_ACTION{
 	ENTER = 13,         /* Enter */
 	CTRL_N = 14,        /* Ctrl-n */
 	CTRL_P = 16,        /* Ctrl-p */
+	CTRL_R = 18,        /* Ctrl-r */
+	CTRL_S = 19,        /* Ctrl-s */
 	CTRL_T = 20,        /* Ctrl-t */
 	CTRL_U = 21,        /* Ctrl+u */
 	CTRL_W = 23,        /* Ctrl+w */
@@ -177,8 +201,14 @@ enum KEY_ACTION{
 };
 
 static void linenoiseAtExit(void);
-int linenoiseHistoryAdd(const char *line);
+int linenoiseHistoryAdd(const char *line, int is_sensitive);
 static void refreshLine(struct linenoiseState *l);
+static void refreshSearchResult(struct linenoiseState *ls);
+
+static inline void resetSearchResult(void) {
+    memset(search_result, 0, sizeof(search_result));
+    memset(search_result_friendly, 0, sizeof(search_result_friendly));
+}
 
 /* Debugging macro. */
 #if 0
@@ -219,6 +249,41 @@ void linenoiseSetMultiLine(int ml) {
     mlmode = ml;
 }
 
+#define REVERSE_SEARCH_PROMPT(direction) ((direction) == -1 ? "(reverse-i-search): " : "(i-search): ")
+
+/* Enables the reverse search mode and refreshes the prompt. */
+static void enableReverseSearchMode(struct linenoiseState *l) {
+    assert(reverse_search_mode_enabled != 1);
+    reverse_search_mode_enabled = 1;
+    l->origin_prompt = l->prompt;
+    l->prompt = REVERSE_SEARCH_PROMPT(reverse_search_direction);
+    refreshLine(l);
+}
+
+/* This function disables the reverse search mode and returns the terminal to its original state.
+ * If the 'discard' parameter is true, it discards the user's input search keyword and search result.
+ * Otherwise, it copies the search result into 'buf', If there is no search result, it copies the
+ * input search keyword instead. */
+static void disableReverseSearchMode(struct linenoiseState *l, char *buf, size_t buflen, int discard) {
+    if (discard) {
+        buf[0] = '\0';
+        l->pos = l->len = 0;
+    } else {
+        ignore_once_hint = 1;
+        if (strlen(search_result)) {
+            strncpy(buf, search_result, buflen);
+            buf[buflen-1] = '\0';
+            l->pos = l->len = strlen(buf);
+        }
+    }
+
+    /* Reset the state to non-search state. */
+    reverse_search_mode_enabled = 0;
+    l->prompt = l->origin_prompt;
+    resetSearchResult();
+    refreshLine(l);
+}
+
 /* Return true if the terminal name is in the list of terminals we know are
  * not able to understand basic escape sequences. */
 static int isUnsupportedTerm(void) {
@@ -233,6 +298,10 @@ static int isUnsupportedTerm(void) {
 
 /* Raw mode: 1960 magic shit. */
 static int enableRawMode(int fd) {
+    if (getenv("FAKETTY_WITH_PROMPT") != NULL) {
+        return 0;
+    }
+
     struct termios raw;
 
     if (!isatty(STDIN_FILENO)) goto fatal;
@@ -301,6 +370,9 @@ static int getCursorPosition(int ifd, int ofd) {
 /* Try to get the number of columns in the current terminal, or assume 80
  * if it fails. */
 static int getColumns(int ifd, int ofd) {
+    if (getenv("FAKETTY_WITH_PROMPT") != NULL) {
+        goto failed;
+    }
     struct winsize ws;
 
     if (ioctl(1, TIOCGWINSZ, &ws) == -1 || ws.ws_col == 0) {
@@ -492,6 +564,13 @@ static void abFree(struct abuf *ab) {
  * to the right of the prompt. */
 void refreshShowHints(struct abuf *ab, struct linenoiseState *l, int plen) {
     char seq[64];
+
+    /* Show hits when not in reverse search mode and not instructed to ignore once. */
+    if (reverse_search_mode_enabled || ignore_once_hint) {
+        ignore_once_hint = 0;
+        return;
+    }
+
     if (hintsCallback && plen+l->len < l->cols) {
         int color = -1, bold = 0;
         char *hint = hintsCallback(l->buf,&color,&bold);
@@ -604,7 +683,12 @@ static void refreshMultiLine(struct linenoiseState *l) {
         unsigned int i;
         for (i = 0; i < l->len; i++) abAppend(&ab,"*",1);
     } else {
-        abAppend(&ab,l->buf,l->len);
+        refreshSearchResult(l);
+        if (strlen(search_result) > 0) {
+            abAppend(&ab, search_result_friendly, strlen(search_result_friendly));
+        } else {
+            abAppend(&ab,l->buf,l->len);
+        }
     }
 
     /* Show hits if any. */
@@ -637,6 +721,9 @@ static void refreshMultiLine(struct linenoiseState *l) {
 
     /* Set column. */
     col = (plen+(int)l->pos) % (int)l->cols;
+    if (strlen(search_result) > 0) {
+        col += search_result_start_offset;
+    }
     lndebug("set col %d", 1+col);
     if (col)
         snprintf(seq,64,"\r\x1b[%dC", col);
@@ -818,7 +905,7 @@ static int linenoiseEdit(int stdin_fd, int stdout_fd, char *buf, size_t buflen,
 
     /* The latest history entry is always our current buffer, that
      * initially is just an empty string. */
-    linenoiseHistoryAdd("");
+    linenoiseHistoryAdd("", 0);
 
     if (write(l.ofd,prompt,l.plen) == -1) return -1;
     while(1) {
@@ -832,7 +919,7 @@ static int linenoiseEdit(int stdin_fd, int stdout_fd, char *buf, size_t buflen,
         /* Only autocomplete when the callback is set. It returns < 0 when
          * there was an error reading from fd. Otherwise it will return the
          * character that should be handled next. */
-        if (c == 9 && completionCallback != NULL) {
+        if (c == TAB && completionCallback != NULL && !reverse_search_mode_enabled) {
             c = completeLine(&l);
             /* Return on errors */
             if (c < 0) return l.len;
@@ -843,6 +930,9 @@ static int linenoiseEdit(int stdin_fd, int stdout_fd, char *buf, size_t buflen,
         switch(c) {
         case NL:       /* enter, typed before raw mode was enabled */
             break;
+        case TAB:
+            if (reverse_search_mode_enabled) disableReverseSearchMode(&l, buf, buflen, 0);
+            break;
         case ENTER:    /* enter */
             history_len--;
             free(history[history_len]);
@@ -855,8 +945,14 @@ static int linenoiseEdit(int stdin_fd, int stdout_fd, char *buf, size_t buflen,
                 refreshLine(&l);
                 hintsCallback = hc;
             }
+
+            if (reverse_search_mode_enabled) disableReverseSearchMode(&l, buf, buflen, 0);
             return (int)l.len;
         case CTRL_C:     /* ctrl-c */
+            if (reverse_search_mode_enabled) {
+                disableReverseSearchMode(&l, buf, buflen, 1);
+                break;
+            }
             errno = EAGAIN;
             return -1;
         case BACKSPACE:   /* backspace */
@@ -891,6 +987,23 @@ static int linenoiseEdit(int stdin_fd, int stdout_fd, char *buf, size_t buflen,
         case CTRL_P:    /* ctrl-p */
             linenoiseEditHistoryNext(&l, LINENOISE_HISTORY_PREV);
             break;
+        case CTRL_R:
+        case CTRL_S:
+            reverse_search_direction = c == CTRL_R ? -1 : 1;
+            if (reverse_search_mode_enabled) {
+                /* cycle search results */
+                cycle_to_next_search = 1;
+                l.prompt = REVERSE_SEARCH_PROMPT(reverse_search_direction);
+                refreshLine(&l);
+                break;
+            }
+            buf[0] = '\0';
+            l.pos = l.len = 0;
+            enableReverseSearchMode(&l);
+            break;
+        case CTRL_G:
+            if (reverse_search_mode_enabled) disableReverseSearchMode(&l, buf, buflen, 1);
+            break;
         case CTRL_N:    /* ctrl-n */
             linenoiseEditHistoryNext(&l, LINENOISE_HISTORY_NEXT);
             break;
@@ -901,6 +1014,11 @@ static int linenoiseEdit(int stdin_fd, int stdout_fd, char *buf, size_t buflen,
             if (read(l.ifd,seq,1) == -1) break;
             if (read(l.ifd,seq+1,1) == -1) break;
 
+            if (reverse_search_mode_enabled) {
+                disableReverseSearchMode(&l, buf, buflen, 1);
+                break;
+            }
+
             /* ESC [ sequences. */
             if (seq[0] == '[') {
                 if (seq[1] >= '0' && seq[1] <= '9') {
@@ -1067,14 +1185,14 @@ static char *linenoiseNoTTY(void) {
  * editing function or uses dummy fgets() so that you will be able to type
  * something even in the most desperate of the conditions. */
 char *linenoise(const char *prompt) {
-    char buf[LINENOISE_MAX_LINE];
+    char buf[LINENOISE_MAX_LINE] = {0};
     int count;
 
-    if (!isatty(STDIN_FILENO)) {
+    if (getenv("FAKETTY_WITH_PROMPT") == NULL && !isatty(STDIN_FILENO)) {
         /* Not a tty: read from file / pipe. In this mode we don't want any
          * limit to the line size, so we call a function to handle that. */
         return linenoiseNoTTY();
-    } else if (isUnsupportedTerm()) {
+    } else if (getenv("FAKETTY_WITH_PROMPT") == NULL && isUnsupportedTerm()) {
         size_t len;
 
         printf("%s",prompt);
@@ -1112,6 +1230,7 @@ static void freeHistory(void) {
         for (j = 0; j < history_len; j++)
             free(history[j]);
         free(history);
+        free(history_sensitive);
     }
 }
 
@@ -1128,7 +1247,7 @@ static void linenoiseAtExit(void) {
  * histories, but will work well for a few hundred of entries.
  *
  * Using a circular buffer is smarter, but a bit more complex to handle. */
-int linenoiseHistoryAdd(const char *line) {
+int linenoiseHistoryAdd(const char *line, int is_sensitive) {
     char *linecopy;
 
     if (history_max_len == 0) return 0;
@@ -1137,7 +1256,14 @@ int linenoiseHistoryAdd(const char *line) {
     if (history == NULL) {
         history = malloc(sizeof(char*)*history_max_len);
         if (history == NULL) return 0;
+        history_sensitive = malloc(sizeof(int)*history_max_len);
+        if (history_sensitive == NULL) {
+            free(history);
+            history = NULL;
+            return 0;
+        }
         memset(history,0,(sizeof(char*)*history_max_len));
+        memset(history_sensitive,0,(sizeof(int)*history_max_len));
     }
 
     /* Don't add duplicated lines. */
@@ -1150,9 +1276,11 @@ int linenoiseHistoryAdd(const char *line) {
     if (history_len == history_max_len) {
         free(history[0]);
         memmove(history,history+1,sizeof(char*)*(history_max_len-1));
+        memmove(history_sensitive,history_sensitive+1,sizeof(int)*(history_max_len-1));
         history_len--;
     }
     history[history_len] = linecopy;
+    history_sensitive[history_len] = is_sensitive;
     history_len++;
     return 1;
 }
@@ -1163,6 +1291,7 @@ int linenoiseHistoryAdd(const char *line) {
  * than the amount of items already inside the history. */
 int linenoiseHistorySetMaxLen(int len) {
     char **new;
+    int *new_sensitive;
 
     if (len < 1) return 0;
     if (history) {
@@ -1170,6 +1299,11 @@ int linenoiseHistorySetMaxLen(int len) {
 
         new = malloc(sizeof(char*)*len);
         if (new == NULL) return 0;
+        new_sensitive = malloc(sizeof(int)*len);
+        if (new_sensitive == NULL) {
+            free(new);
+            return 0;
+        }
 
         /* If we can't copy everything, free the elements we'll not use. */
         if (len < tocopy) {
@@ -1179,9 +1313,13 @@ int linenoiseHistorySetMaxLen(int len) {
             tocopy = len;
         }
         memset(new,0,sizeof(char*)*len);
+        memset(new_sensitive,0,sizeof(int)*len);
         memcpy(new,history+(history_len-tocopy), sizeof(char*)*tocopy);
+        memcpy(new_sensitive,history_sensitive+(history_len-tocopy), sizeof(int)*tocopy);
         free(history);
+        free(history_sensitive);
         history = new;
+        history_sensitive = new_sensitive;
     }
     history_max_len = len;
     if (history_len > history_max_len)
@@ -1201,7 +1339,7 @@ int linenoiseHistorySave(const char *filename) {
     if (fp == NULL) return -1;
     fchmod(fileno(fp),S_IRUSR|S_IWUSR);
     for (j = 0; j < history_len; j++)
-        fprintf(fp,"%s\n",history[j]);
+        if (!history_sensitive[j]) fprintf(fp,"%s\n",history[j]);
     fclose(fp);
     return 0;
 }
@@ -1223,8 +1361,97 @@ int linenoiseHistoryLoad(const char *filename) {
         p = strchr(buf,'\r');
         if (!p) p = strchr(buf,'\n');
         if (p) *p = '\0';
-        linenoiseHistoryAdd(buf);
+        linenoiseHistoryAdd(buf, 0);
     }
     fclose(fp);
     return 0;
 }
+
+/* This function updates the search index based on the direction of the search.
+ * Returns 0 if the beginning or end of the history is reached, otherwise, returns 1. */
+static int setNextSearchIndex(int *i) {
+    if (reverse_search_direction == 1) {
+        if (*i == history_len-1) return 0;
+        *i = *i + 1;
+    } else {
+        if (*i <= 0) return 0;
+        *i = *i - 1;
+    }
+    return 1;
+}
+
+linenoiseHistorySearchResult searchInHistory(char *search_term) {
+    linenoiseHistorySearchResult result = {0};
+
+    if (!history_len || !strlen(search_term)) return result;
+
+    int i = cycle_to_next_search ? search_result_history_index :
+        (reverse_search_direction == -1 ? history_len-1 : 0);
+    
+    while (1) {
+        char *found = strstr(history[i], search_term);
+        
+        /* check if we found the same string at another index when cycling, this would be annoying to cycle through
+         * as it might appear that cycling isn't working */
+        int strings_are_the_same = cycle_to_next_search && strcmp(history[i], history[search_result_history_index]) == 0; 
+
+        if (found && !strings_are_the_same) {
+            int haystack_index = found - history[i];
+            result.result = history[i];
+            result.len = strlen(history[i]);
+            result.search_term_index = haystack_index;
+            result.search_term_len = strlen(search_term);
+            search_result_history_index = i;
+            break;
+        }
+
+        /* Exit if reached the end. */
+        if (!setNextSearchIndex(&i)) break;
+    }
+
+    return result;
+}
+
+static void refreshSearchResult(struct linenoiseState *ls) {
+   if (!reverse_search_mode_enabled) {
+        return;
+    }
+
+    linenoiseHistorySearchResult sr = searchInHistory(ls->buf);
+    int found = sr.result && sr.len;
+
+    /* If the search term has not changed and we are cycling to the next search result
+     * (using CTRL+R or CTRL+S), there is no need to reset the old search result. */
+    if (!cycle_to_next_search || found)
+        resetSearchResult();
+    cycle_to_next_search = 0;
+
+    if (found) {
+        char *bold = "\x1B[1m";
+        char *normal = "\x1B[0m";
+
+        int size_needed = sr.search_term_index + sr.search_term_len + sr.len -
+            (sr.search_term_index+sr.search_term_len) + sizeof(normal) + sizeof(bold) + sizeof(normal);
+        if (size_needed > sizeof(search_result_friendly) - 1) {
+            return;
+        }
+
+        /* Allocate memory for the prefix, match, and suffix strings, one extra byte for `\0`. */
+        char *prefix = calloc(sizeof(char), sr.search_term_index + 1);
+        char *match = calloc(sizeof(char), sr.search_term_len + 1);
+        char *suffix = calloc(sizeof(char), sr.len - (sr.search_term_index+sr.search_term_len) + 1);
+
+        memcpy(prefix, sr.result, sr.search_term_index);
+        memcpy(match, sr.result + sr.search_term_index, sr.search_term_len);
+        memcpy(suffix, sr.result + sr.search_term_index + sr.search_term_len,
+               sr.len - (sr.search_term_index+sr.search_term_len));
+        sprintf(search_result, "%s%s%s", prefix, match, suffix);
+        sprintf(search_result_friendly, "%s%s%s%s%s%s", normal, prefix, bold, match, normal, suffix);
+
+        free(prefix);
+        free(match);
+        free(suffix);
+
+        search_result_start_offset = sr.search_term_index;
+    }
+}
diff --git a/deps/linenoise/linenoise.h b/deps/linenoise/linenoise.h
index 6dfee73bcd4..beac6df467a 100644
--- a/deps/linenoise/linenoise.h
+++ b/deps/linenoise/linenoise.h
@@ -58,7 +58,7 @@ void linenoiseAddCompletion(linenoiseCompletions *, const char *);
 
 char *linenoise(const char *prompt);
 void linenoiseFree(void *ptr);
-int linenoiseHistoryAdd(const char *line);
+int linenoiseHistoryAdd(const char *line, int is_sensitive);
 int linenoiseHistorySetMaxLen(int len);
 int linenoiseHistorySave(const char *filename);
 int linenoiseHistoryLoad(const char *filename);
diff --git a/deps/lua/src/loslib.c b/deps/lua/src/loslib.c
index da06a572acf..403f41634a1 100644
--- a/deps/lua/src/loslib.c
+++ b/deps/lua/src/loslib.c
@@ -234,10 +234,17 @@ static const luaL_Reg syslib[] = {
 
 /* }====================================================== */
 
+#define UNUSED(V) ((void) V)
 
+/* Only a subset is loaded currently, for sandboxing concerns. */
+static const luaL_Reg sandbox_syslib[] = {
+  {"clock",     os_clock},
+  {NULL, NULL}
+};
 
 LUALIB_API int luaopen_os (lua_State *L) {
-  luaL_register(L, LUA_OSLIBNAME, syslib);
+  UNUSED(syslib);
+  luaL_register(L, LUA_OSLIBNAME, sandbox_syslib);
   return 1;
 }
 
diff --git a/deps/lua/src/lua_cjson.c b/deps/lua/src/lua_cjson.c
index c26c0d7b8ea..b86d73e97cf 100644
--- a/deps/lua/src/lua_cjson.c
+++ b/deps/lua/src/lua_cjson.c
@@ -39,6 +39,7 @@
 #include <assert.h>
 #include <string.h>
 #include <math.h>
+#include <stdint.h>
 #include <limits.h>
 #include "lua.h"
 #include "lauxlib.h"
@@ -141,13 +142,13 @@ typedef struct {
 
 typedef struct {
     json_token_type_t type;
-    int index;
+    size_t index;
     union {
         const char *string;
         double number;
         int boolean;
     } value;
-    int string_len;
+    size_t string_len;
 } json_token_t;
 
 static const char *char2escape[256] = {
@@ -463,9 +464,8 @@ static void json_encode_exception(lua_State *l, json_config_t *cfg, strbuf_t *js
 static void json_append_string(lua_State *l, strbuf_t *json, int lindex)
 {
     const char *escstr;
-    int i;
     const char *str;
-    size_t len;
+    size_t i, len;
 
     str = lua_tolstring(l, lindex, &len);
 
@@ -473,6 +473,8 @@ static void json_append_string(lua_State *l, strbuf_t *json, int lindex)
      * This buffer is reused constantly for small strings
      * If there are any excess pages, they won't be hit anyway.
      * This gains ~5% speedup. */
+    if (len > SIZE_MAX / 6 - 3)
+        abort(); /* Overflow check */
     strbuf_ensure_empty_length(json, len * 6 + 2);
 
     strbuf_append_char_unsafe(json, '\"');
@@ -706,7 +708,7 @@ static int json_encode(lua_State *l)
     strbuf_t local_encode_buf;
     strbuf_t *encode_buf;
     char *json;
-    int len;
+    size_t len;
 
     luaL_argcheck(l, lua_gettop(l) == 1, 1, "expected 1 argument");
 
diff --git a/deps/lua/src/lua_cmsgpack.c b/deps/lua/src/lua_cmsgpack.c
index 49c6dc7b0ce..5f8929d454d 100644
--- a/deps/lua/src/lua_cmsgpack.c
+++ b/deps/lua/src/lua_cmsgpack.c
@@ -117,7 +117,9 @@ mp_buf *mp_buf_new(lua_State *L) {
 
 void mp_buf_append(lua_State *L, mp_buf *buf, const unsigned char *s, size_t len) {
     if (buf->free < len) {
-        size_t newsize = (buf->len+len)*2;
+        size_t newsize = buf->len+len;
+        if (newsize < buf->len || newsize >= SIZE_MAX/2) abort();
+        newsize *= 2;
 
         buf->b = (unsigned char*)mp_realloc(L, buf->b, buf->len + buf->free, newsize);
         buf->free = newsize - buf->len;
@@ -173,7 +175,7 @@ void mp_cur_init(mp_cur *cursor, const unsigned char *s, size_t len) {
 
 void mp_encode_bytes(lua_State *L, mp_buf *buf, const unsigned char *s, size_t len) {
     unsigned char hdr[5];
-    int hdrlen;
+    size_t hdrlen;
 
     if (len < 32) {
         hdr[0] = 0xa0 | (len&0xff); /* fix raw */
@@ -220,7 +222,7 @@ void mp_encode_double(lua_State *L, mp_buf *buf, double d) {
 
 void mp_encode_int(lua_State *L, mp_buf *buf, int64_t n) {
     unsigned char b[9];
-    int enclen;
+    size_t enclen;
 
     if (n >= 0) {
         if (n <= 127) {
@@ -290,9 +292,9 @@ void mp_encode_int(lua_State *L, mp_buf *buf, int64_t n) {
     mp_buf_append(L,buf,b,enclen);
 }
 
-void mp_encode_array(lua_State *L, mp_buf *buf, int64_t n) {
+void mp_encode_array(lua_State *L, mp_buf *buf, uint64_t n) {
     unsigned char b[5];
-    int enclen;
+    size_t enclen;
 
     if (n <= 15) {
         b[0] = 0x90 | (n & 0xf);    /* fix array */
@@ -313,7 +315,7 @@ void mp_encode_array(lua_State *L, mp_buf *buf, int64_t n) {
     mp_buf_append(L,buf,b,enclen);
 }
 
-void mp_encode_map(lua_State *L, mp_buf *buf, int64_t n) {
+void mp_encode_map(lua_State *L, mp_buf *buf, uint64_t n) {
     unsigned char b[5];
     int enclen;
 
@@ -791,7 +793,7 @@ void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
     }
 }
 
-int mp_unpack_full(lua_State *L, int limit, int offset) {
+int mp_unpack_full(lua_State *L, lua_Integer limit, lua_Integer offset) {
     size_t len;
     const char *s;
     mp_cur c;
@@ -803,10 +805,10 @@ int mp_unpack_full(lua_State *L, int limit, int offset) {
     if (offset < 0 || limit < 0) /* requesting negative off or lim is invalid */
         return luaL_error(L,
             "Invalid request to unpack with offset of %d and limit of %d.",
-            offset, len);
+            (int) offset, (int) len);
     else if (offset > len)
         return luaL_error(L,
-            "Start offset %d greater than input length %d.", offset, len);
+            "Start offset %d greater than input length %d.", (int) offset, (int) len);
 
     if (decode_all) limit = INT_MAX;
 
@@ -828,12 +830,13 @@ int mp_unpack_full(lua_State *L, int limit, int offset) {
         /* c->left is the remaining size of the input buffer.
          * subtract the entire buffer size from the unprocessed size
          * to get our next start offset */
-        int offset = len - c.left;
+        size_t new_offset = len - c.left;
+        if (new_offset > LONG_MAX) abort();
 
         luaL_checkstack(L, 1, "in function mp_unpack_full");
 
         /* Return offset -1 when we have have processed the entire buffer. */
-        lua_pushinteger(L, c.left == 0 ? -1 : offset);
+        lua_pushinteger(L, c.left == 0 ? -1 : (lua_Integer) new_offset);
         /* Results are returned with the arg elements still
          * in place. Lua takes care of only returning
          * elements above the args for us.
@@ -852,15 +855,15 @@ int mp_unpack(lua_State *L) {
 }
 
 int mp_unpack_one(lua_State *L) {
-    int offset = luaL_optinteger(L, 2, 0);
+    lua_Integer offset = luaL_optinteger(L, 2, 0);
     /* Variable pop because offset may not exist */
     lua_pop(L, lua_gettop(L)-1);
     return mp_unpack_full(L, 1, offset);
 }
 
 int mp_unpack_limit(lua_State *L) {
-    int limit = luaL_checkinteger(L, 2);
-    int offset = luaL_optinteger(L, 3, 0);
+    lua_Integer limit = luaL_checkinteger(L, 2);
+    lua_Integer offset = luaL_optinteger(L, 3, 0);
     /* Variable pop because offset may not exist */
     lua_pop(L, lua_gettop(L)-1);
 
diff --git a/deps/lua/src/strbuf.c b/deps/lua/src/strbuf.c
index f0f7f4b9a36..97ee940c900 100644
--- a/deps/lua/src/strbuf.c
+++ b/deps/lua/src/strbuf.c
@@ -26,6 +26,7 @@
 #include <stdlib.h>
 #include <stdarg.h>
 #include <string.h>
+#include <stdint.h>
 
 #include "strbuf.h"
 
@@ -38,22 +39,22 @@ static void die(const char *fmt, ...)
     va_end(arg);
     fprintf(stderr, "\n");
 
-    exit(-1);
+    abort();
 }
 
-void strbuf_init(strbuf_t *s, int len)
+void strbuf_init(strbuf_t *s, size_t len)
 {
-    int size;
+    size_t size;
 
-    if (len <= 0)
+    if (!len)
         size = STRBUF_DEFAULT_SIZE;
     else
-        size = len + 1;         /* \0 terminator */
-
+        size = len + 1;
+    if (size < len)
+        die("Overflow, len: %zu", len);
     s->buf = NULL;
     s->size = size;
     s->length = 0;
-    s->increment = STRBUF_DEFAULT_INCREMENT;
     s->dynamic = 0;
     s->reallocs = 0;
     s->debug = 0;
@@ -65,7 +66,7 @@ void strbuf_init(strbuf_t *s, int len)
     strbuf_ensure_null(s);
 }
 
-strbuf_t *strbuf_new(int len)
+strbuf_t *strbuf_new(size_t len)
 {
     strbuf_t *s;
 
@@ -81,20 +82,10 @@ strbuf_t *strbuf_new(int len)
     return s;
 }
 
-void strbuf_set_increment(strbuf_t *s, int increment)
-{
-    /* Increment > 0:  Linear buffer growth rate
-     * Increment < -1: Exponential buffer growth rate */
-    if (increment == 0 || increment == -1)
-        die("BUG: Invalid string increment");
-
-    s->increment = increment;
-}
-
 static inline void debug_stats(strbuf_t *s)
 {
     if (s->debug) {
-        fprintf(stderr, "strbuf(%lx) reallocs: %d, length: %d, size: %d\n",
+        fprintf(stderr, "strbuf(%lx) reallocs: %d, length: %zd, size: %zd\n",
                 (long)s, s->reallocs, s->length, s->size);
     }
 }
@@ -113,7 +104,7 @@ void strbuf_free(strbuf_t *s)
         free(s);
 }
 
-char *strbuf_free_to_string(strbuf_t *s, int *len)
+char *strbuf_free_to_string(strbuf_t *s, size_t *len)
 {
     char *buf;
 
@@ -131,57 +122,61 @@ char *strbuf_free_to_string(strbuf_t *s, int *len)
     return buf;
 }
 
-static int calculate_new_size(strbuf_t *s, int len)
+static size_t calculate_new_size(strbuf_t *s, size_t len)
 {
-    int reqsize, newsize;
+    size_t reqsize, newsize;
 
     if (len <= 0)
         die("BUG: Invalid strbuf length requested");
 
     /* Ensure there is room for optional NULL termination */
     reqsize = len + 1;
+    if (reqsize < len)
+        die("Overflow, len: %zu", len);
 
     /* If the user has requested to shrink the buffer, do it exactly */
     if (s->size > reqsize)
         return reqsize;
 
     newsize = s->size;
-    if (s->increment < 0) {
+    if (reqsize >= SIZE_MAX / 2) {
+        newsize = reqsize;
+    } else {
         /* Exponential sizing */
         while (newsize < reqsize)
-            newsize *= -s->increment;
-    } else {
-        /* Linear sizing */
-        newsize = ((newsize + s->increment - 1) / s->increment) * s->increment;
+            newsize *= 2;
     }
 
+    if (newsize < reqsize)
+        die("BUG: strbuf length would overflow, len: %zu", len);
+
     return newsize;
 }
 
 
 /* Ensure strbuf can handle a string length bytes long (ignoring NULL
  * optional termination). */
-void strbuf_resize(strbuf_t *s, int len)
+void strbuf_resize(strbuf_t *s, size_t len)
 {
-    int newsize;
+    size_t newsize;
 
     newsize = calculate_new_size(s, len);
 
     if (s->debug > 1) {
-        fprintf(stderr, "strbuf(%lx) resize: %d => %d\n",
+        fprintf(stderr, "strbuf(%lx) resize: %zd => %zd\n",
                 (long)s, s->size, newsize);
     }
 
     s->size = newsize;
     s->buf = realloc(s->buf, s->size);
     if (!s->buf)
-        die("Out of memory");
+        die("Out of memory, len: %zu", len);
     s->reallocs++;
 }
 
 void strbuf_append_string(strbuf_t *s, const char *str)
 {
-    int space, i;
+    size_t i, space;
 
     space = strbuf_empty_length(s);
 
@@ -197,55 +192,6 @@ void strbuf_append_string(strbuf_t *s, const char *str)
     }
 }
 
-/* strbuf_append_fmt() should only be used when an upper bound
- * is known for the output string. */
-void strbuf_append_fmt(strbuf_t *s, int len, const char *fmt, ...)
-{
-    va_list arg;
-    int fmt_len;
-
-    strbuf_ensure_empty_length(s, len);
-
-    va_start(arg, fmt);
-    fmt_len = vsnprintf(s->buf + s->length, len, fmt, arg);
-    va_end(arg);
-
-    if (fmt_len < 0)
-        die("BUG: Unable to convert number");  /* This should never happen.. */
-
-    s->length += fmt_len;
-}
-
-/* strbuf_append_fmt_retry() can be used when the there is no known
- * upper bound for the output string. */
-void strbuf_append_fmt_retry(strbuf_t *s, const char *fmt, ...)
-{
-    va_list arg;
-    int fmt_len, try;
-    int empty_len;
-
-    /* If the first attempt to append fails, resize the buffer appropriately
-     * and try again */
-    for (try = 0; ; try++) {
-        va_start(arg, fmt);
-        /* Append the new formatted string */
-        /* fmt_len is the length of the string required, excluding the
-         * trailing NULL */
-        empty_len = strbuf_empty_length(s);
-        /* Add 1 since there is also space to store the terminating NULL. */
-        fmt_len = vsnprintf(s->buf + s->length, empty_len + 1, fmt, arg);
-        va_end(arg);
-
-        if (fmt_len <= empty_len)
-            break;  /* SUCCESS */
-        if (try > 0)
-            die("BUG: length of formatted string changed");
-
-        strbuf_resize(s, s->length + fmt_len);
-    }
-
-    s->length += fmt_len;
-}
 
 /* vi:ai et sw=4 ts=4:
  */
diff --git a/deps/lua/src/strbuf.h b/deps/lua/src/strbuf.h
index d861108c14c..c10f83f0db8 100644
--- a/deps/lua/src/strbuf.h
+++ b/deps/lua/src/strbuf.h
@@ -27,15 +27,13 @@
 
 /* Size: Total bytes allocated to *buf
  * Length: String length, excluding optional NULL terminator.
- * Increment: Allocation increments when resizing the string buffer.
  * Dynamic: True if created via strbuf_new()
  */
 
 typedef struct {
     char *buf;
-    int size;
-    int length;
-    int increment;
+    size_t size;
+    size_t length;
     int dynamic;
     int reallocs;
     int debug;
@@ -44,32 +42,26 @@ typedef struct {
 #ifndef STRBUF_DEFAULT_SIZE
 #define STRBUF_DEFAULT_SIZE 1023
 #endif
-#ifndef STRBUF_DEFAULT_INCREMENT
-#define STRBUF_DEFAULT_INCREMENT -2
-#endif
 
 /* Initialise */
-extern strbuf_t *strbuf_new(int len);
-extern void strbuf_init(strbuf_t *s, int len);
-extern void strbuf_set_increment(strbuf_t *s, int increment);
+extern strbuf_t *strbuf_new(size_t len);
+extern void strbuf_init(strbuf_t *s, size_t len);
 
 /* Release */
 extern void strbuf_free(strbuf_t *s);
-extern char *strbuf_free_to_string(strbuf_t *s, int *len);
+extern char *strbuf_free_to_string(strbuf_t *s, size_t *len);
 
 /* Management */
-extern void strbuf_resize(strbuf_t *s, int len);
-static int strbuf_empty_length(strbuf_t *s);
-static int strbuf_length(strbuf_t *s);
-static char *strbuf_string(strbuf_t *s, int *len);
-static void strbuf_ensure_empty_length(strbuf_t *s, int len);
+extern void strbuf_resize(strbuf_t *s, size_t len);
+static size_t strbuf_empty_length(strbuf_t *s);
+static size_t strbuf_length(strbuf_t *s);
+static char *strbuf_string(strbuf_t *s, size_t *len);
+static void strbuf_ensure_empty_length(strbuf_t *s, size_t len);
 static char *strbuf_empty_ptr(strbuf_t *s);
-static void strbuf_extend_length(strbuf_t *s, int len);
+static void strbuf_extend_length(strbuf_t *s, size_t len);
 
 /* Update */
-extern void strbuf_append_fmt(strbuf_t *s, int len, const char *fmt, ...);
-extern void strbuf_append_fmt_retry(strbuf_t *s, const char *format, ...);
-static void strbuf_append_mem(strbuf_t *s, const char *c, int len);
+static void strbuf_append_mem(strbuf_t *s, const char *c, size_t len);
 extern void strbuf_append_string(strbuf_t *s, const char *str);
 static void strbuf_append_char(strbuf_t *s, const char c);
 static void strbuf_ensure_null(strbuf_t *s);
@@ -87,12 +79,12 @@ static inline int strbuf_allocated(strbuf_t *s)
 
 /* Return bytes remaining in the string buffer
  * Ensure there is space for a NULL terminator. */
-static inline int strbuf_empty_length(strbuf_t *s)
+static inline size_t strbuf_empty_length(strbuf_t *s)
 {
     return s->size - s->length - 1;
 }
 
-static inline void strbuf_ensure_empty_length(strbuf_t *s, int len)
+static inline void strbuf_ensure_empty_length(strbuf_t *s, size_t len)
 {
     if (len > strbuf_empty_length(s))
         strbuf_resize(s, s->length + len);
@@ -103,12 +95,12 @@ static inline char *strbuf_empty_ptr(strbuf_t *s)
     return s->buf + s->length;
 }
 
-static inline void strbuf_extend_length(strbuf_t *s, int len)
+static inline void strbuf_extend_length(strbuf_t *s, size_t len)
 {
     s->length += len;
 }
 
-static inline int strbuf_length(strbuf_t *s)
+static inline size_t strbuf_length(strbuf_t *s)
 {
     return s->length;
 }
@@ -124,14 +116,14 @@ static inline void strbuf_append_char_unsafe(strbuf_t *s, const char c)
     s->buf[s->length++] = c;
 }
 
-static inline void strbuf_append_mem(strbuf_t *s, const char *c, int len)
+static inline void strbuf_append_mem(strbuf_t *s, const char *c, size_t len)
 {
     strbuf_ensure_empty_length(s, len);
     memcpy(s->buf + s->length, c, len);
     s->length += len;
 }
 
-static inline void strbuf_append_mem_unsafe(strbuf_t *s, const char *c, int len)
+static inline void strbuf_append_mem_unsafe(strbuf_t *s, const char *c, size_t len)
 {
     memcpy(s->buf + s->length, c, len);
     s->length += len;
@@ -142,7 +134,7 @@ static inline void strbuf_ensure_null(strbuf_t *s)
     s->buf[s->length] = 0;
 }
 
-static inline char *strbuf_string(strbuf_t *s, int *len)
+static inline char *strbuf_string(strbuf_t *s, size_t *len)
 {
     if (len)
         *len = s->length;
diff --git a/redis.conf b/redis.conf
index 97f077b0d0a..65e01b0742e 100644
--- a/redis.conf
+++ b/redis.conf
@@ -51,6 +51,7 @@
 #
 # loadmodule /path/to/my_module.so
 # loadmodule /path/to/other_module.so
+# loadmodule /path/to/args_module.so [arg [arg ...]]
 
 ################################## NETWORK #####################################
 
@@ -1162,7 +1163,8 @@ acllog-max-len 128
 # configuration directive.
 #
 # The default of 5 produces good enough results. 10 Approximates very closely
-# true LRU but costs more CPU. 3 is faster but not very accurate.
+# true LRU but costs more CPU. 3 is faster but not very accurate. The maximum
+# value that can be set is 64.
 #
 # maxmemory-samples 5
 
@@ -1382,6 +1384,10 @@ disable-thp yes
 # If the AOF is enabled on startup Redis will load the AOF, that is the file
 # with the better durability guarantees.
 #
+# Note that changing this value in a config file of an existing database and
+# restarting the server can lead to data loss. A conversion needs to be done
+# by setting it via CONFIG command on a live server first.
+#
 # Please check https://redis.io/topics/persistence for more information.
 
 appendonly no
@@ -2070,7 +2076,7 @@ client-output-buffer-limit pubsub 32mb 8mb 60
 # amount by default in order to avoid that a protocol desynchronization (for
 # instance due to a bug in the client) will lead to unbound memory usage in
 # the query buffer. However you can configure it here if you have very special
-# needs, such us huge multi/exec requests or alike.
+# needs, such as a command with huge argument, or huge multi/exec requests or alike.
 #
 # client-query-buffer-limit 1gb
 
@@ -2195,6 +2201,26 @@ rdb-save-incremental-fsync yes
 # lfu-log-factor 10
 # lfu-decay-time 1
 
+
+# The maximum number of new client connections accepted per event-loop cycle. This configuration
+# is set independently for TLS connections.
+#
+# By default, up to 10 new connection will be accepted per event-loop cycle for normal connections
+# and up to 1 new connection per event-loop cycle for TLS connections.
+#
+# Adjusting this to a larger number can slightly improve efficiency for new connections
+# at the risk of causing timeouts for regular commands on established connections.  It is
+# not advised to change this without ensuring that all clients have limited connection
+# pools and exponential backoff in the case of command/connection timeouts. 
+#
+# If your application is establishing a large number of new connections per second you should
+# also consider tuning the value of tcp-backlog, which allows the kernel to buffer more
+# pending connections before dropping or rejecting connections. 
+#
+# max-new-connections-per-cycle 10
+# max-new-tls-connections-per-cycle 1
+
+
 ########################### ACTIVE DEFRAGMENTATION #######################
 #
 # What is active defragmentation?
@@ -2276,16 +2302,16 @@ jemalloc-bg-thread yes
 # the taskset command:
 #
 # Set redis server/io threads to cpu affinity 0,2,4,6:
-# server_cpulist 0-7:2
+# server-cpulist 0-7:2
 #
 # Set bio threads to cpu affinity 1,3:
-# bio_cpulist 1,3
+# bio-cpulist 1,3
 #
 # Set aof rewrite child process to cpu affinity 8,9,10,11:
-# aof_rewrite_cpulist 8-11
+# aof-rewrite-cpulist 8-11
 #
 # Set bgsave child process to cpu affinity 1,10,11
-# bgsave_cpulist 1,10-11
+# bgsave-cpulist 1,10-11
 
 # In some cases redis will emit warnings and even refuse to start if it detects
 # that the system is in bad state, it is possible to suppress these warnings
diff --git a/runtest-moduleapi b/runtest-moduleapi
index ff685afb66c..910d581f2fc 100755
--- a/runtest-moduleapi
+++ b/runtest-moduleapi
@@ -55,4 +55,5 @@ $TCLSH tests/test_helper.tcl \
 --single unit/moduleapi/async_rm_call \
 --single unit/moduleapi/moduleauth \
 --single unit/moduleapi/rdbloadsave \
+--single unit/moduleapi/crash \
 "${@}"
diff --git a/src/Makefile b/src/Makefile
index ecbd2753d9f..cc84d09ad76 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,6 +1,9 @@
 # Redis Makefile
-# Copyright (C) 2009 Salvatore Sanfilippo <antirez at gmail dot com>
-# This file is released under the BSD license, see the COPYING file
+# Copyright (c) 2011-Present, Redis Ltd.
+# All rights reserved.
+#
+# Licensed under your choice of the Redis Source Available License 2.0
+# (RSALv2) or the Server Side Public License v1 (SSPLv1).
 #
 # The Makefile composes the final FINAL_CFLAGS and FINAL_LDFLAGS using
 # what is needed for Redis plus the standard CFLAGS and LDFLAGS passed.
@@ -16,14 +19,20 @@ release_hdr := $(shell sh -c './mkreleasehdr.sh')
 uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
 uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not')
 CLANG := $(findstring clang,$(shell sh -c '$(CC) --version | head -1'))
+
+# Optimization flags. To override, the OPTIMIZATION variable can be passed, but
+# some automatic defaults are added to it. To specify optimization flags
+# explicitly without any defaults added, pass the OPT variable instead.
 OPTIMIZATION?=-O3
 ifeq ($(OPTIMIZATION),-O3)
 	ifeq (clang,$(CLANG))
-		REDIS_CFLAGS+=-flto
+		OPTIMIZATION+=-flto
 	else
-		REDIS_CFLAGS+=-flto=auto
+		OPTIMIZATION+=-flto=auto
 	endif
-	REDIS_LDFLAGS+=-O3 -flto
+endif
+ifneq ($(OPTIMIZATION),-O0)
+	OPTIMIZATION+=-fno-omit-frame-pointer
 endif
 DEPENDENCY_TARGETS=hiredis linenoise lua hdr_histogram fpconv
 NODEPS:=clean distclean
@@ -117,7 +126,7 @@ endif
 -include .make-settings
 
 FINAL_CFLAGS=$(STD) $(WARN) $(OPT) $(DEBUG) $(CFLAGS) $(REDIS_CFLAGS)
-FINAL_LDFLAGS=$(LDFLAGS) $(REDIS_LDFLAGS) $(DEBUG)
+FINAL_LDFLAGS=$(LDFLAGS) $(OPT) $(REDIS_LDFLAGS) $(DEBUG)
 FINAL_LIBS=-lm
 DEBUG=-g -ggdb
 
@@ -345,7 +354,7 @@ endif
 
 REDIS_SERVER_NAME=redis-server$(PROG_SUFFIX)
 REDIS_SENTINEL_NAME=redis-sentinel$(PROG_SUFFIX)
-REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o
+REDIS_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o ebuckets.o mstr.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o
 REDIS_CLI_NAME=redis-cli$(PROG_SUFFIX)
 REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o ae.o redisassert.o crcspeed.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o
 REDIS_BENCHMARK_NAME=redis-benchmark$(PROG_SUFFIX)
@@ -420,7 +429,7 @@ $(TLS_MODULE_NAME): $(REDIS_SERVER_NAME)
 
 # redis-cli
 $(REDIS_CLI_NAME): $(REDIS_CLI_OBJ)
-	$(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/linenoise/linenoise.o $(FINAL_LIBS) $(TLS_CLIENT_LIBS)
+	$(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/linenoise/linenoise.o ../deps/hdr_histogram/libhdrhistogram.a $(FINAL_LIBS) $(TLS_CLIENT_LIBS)
 
 # redis-benchmark
 $(REDIS_BENCHMARK_NAME): $(REDIS_BENCHMARK_OBJ)
@@ -435,11 +444,16 @@ DEP = $(REDIS_SERVER_OBJ:%.o=%.d) $(REDIS_CLI_OBJ:%.o=%.d) $(REDIS_BENCHMARK_OBJ
 %.o: %.c .make-prerequisites
 	$(REDIS_CC) -MMD -o $@ -c $<
 
-# The file commands.def is checked in and doesn't normally need to be rebuilt. It
-# is built only if python is available and its prereqs are modified.
+# The following files are checked in and don't normally need to be rebuilt. They
+# are built only if python is available and their prereqs are modified.
 ifneq (,$(PYTHON))
 $(COMMANDS_DEF_FILENAME).def: commands/*.json ../utils/generate-command-code.py
 	$(QUIET_GEN)$(PYTHON) ../utils/generate-command-code.py $(GEN_COMMANDS_FLAGS)
+
+fmtargs.h: ../utils/generate-fmtargs.py
+	$(QUITE_GEN)sed '/Everything below this line/,$$d' $@ > $@.tmp
+	$(QUITE_GEN)$(PYTHON) ../utils/generate-fmtargs.py >> $@.tmp
+	$(QUITE_GEN)mv $@.tmp $@
 endif
 
 commands.c: $(COMMANDS_DEF_FILENAME).def
diff --git a/src/acl.c b/src/acl.c
index aa42c58dcff..c2cca0f3fdd 100644
--- a/src/acl.c
+++ b/src/acl.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2018, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2018-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -59,10 +38,12 @@ static rax *commandId = NULL; /* Command name to id mapping */
 
 static unsigned long nextid = 0; /* Next command id that has not been assigned */
 
+#define ACL_MAX_CATEGORIES 64 /* Maximum number of command categories  */
+
 struct ACLCategoryItem {
-    const char *name;
+    char *name;
     uint64_t flag;
-} ACLCommandCategories[] = { /* See redis.conf for details on each category. */
+} ACLDefaultCommandCategories[] = { /* See redis.conf for details on each category. */
     {"keyspace", ACL_CATEGORY_KEYSPACE},
     {"read", ACL_CATEGORY_READ},
     {"write", ACL_CATEGORY_WRITE},
@@ -87,6 +68,54 @@ struct ACLCategoryItem {
     {NULL,0} /* Terminator. */
 };
 
+static struct ACLCategoryItem *ACLCommandCategories = NULL;
+static size_t nextCommandCategory = 0; /* Index of the next command category to be added */
+
+/* Implements the ability to add to the list of ACL categories at runtime. Since each ACL category
+ * also requires a bit in the acl_categories flag, there is a limit to the number that can be added.
+ * The new ACL categories occupy the remaining bits of acl_categories flag, other than the bits
+ * occupied by the default ACL command categories.
+ * 
+ * The optional `flag` argument allows the assignment of the `acl_categories` flag bit to the ACL category.
+ * When adding a new category, except for the default ACL command categories, this arguments should be `0`
+ * to allow the function to assign the next available `acl_categories` flag bit to the new ACL category.
+ *
+ * returns 1 -> Added, 0 -> Failed (out of space)
+ *
+ * This function is present here to gain access to the ACLCommandCategories array and add a new ACL category.
+ */
+int ACLAddCommandCategory(const char *name, uint64_t flag) {
+    if (nextCommandCategory >= ACL_MAX_CATEGORIES) return 0;
+    ACLCommandCategories[nextCommandCategory].name = zstrdup(name);
+    ACLCommandCategories[nextCommandCategory].flag = flag != 0 ? flag : (1ULL<<nextCommandCategory);
+    nextCommandCategory++;
+    return 1;
+}
+
+/* Initializes ACLCommandCategories with default ACL categories and allocates space for 
+ * new ACL categories.
+ */
+void ACLInitCommandCategories(void) {
+    ACLCommandCategories = zcalloc(sizeof(struct ACLCategoryItem) * (ACL_MAX_CATEGORIES + 1));
+    for (int j = 0; ACLDefaultCommandCategories[j].flag; j++) {
+        serverAssert(ACLAddCommandCategory(ACLDefaultCommandCategories[j].name, ACLDefaultCommandCategories[j].flag));
+    }
+}
+
+/* This function removes the specified number of categories from the trailing end of
+ * the `ACLCommandCategories` array.
+ * The purpose of this is to remove the categories added by modules that fail
+ * during the onload function.
+ */
+void ACLCleanupCategoriesOnFailure(size_t num_acl_categories_added) {
+    for (size_t j = nextCommandCategory - num_acl_categories_added; j < nextCommandCategory; j++) {
+        zfree(ACLCommandCategories[j].name);
+        ACLCommandCategories[j].name = NULL;
+        ACLCommandCategories[j].flag = 0;
+    }
+    nextCommandCategory -= num_acl_categories_added;
+}
+
 struct ACLUserFlag {
     const char *name;
     uint64_t flag;
@@ -259,7 +288,7 @@ void *ACLListDupSds(void *item) {
 /* Structure used for handling key patterns with different key
  * based permissions. */
 typedef struct {
-    int flags; /* The CMD_KEYS_* flags for this key pattern */
+    int flags; /* The ACL key permission types for this key pattern */
     sds pattern; /* The pattern to match keys against */
 } keyPattern;
 
@@ -387,7 +416,7 @@ aclSelector *ACLUserGetRootSelector(user *u) {
  *
  * If the user with such name already exists NULL is returned. */
 user *ACLCreateUser(const char *name, size_t namelen) {
-    if (raxFind(Users,(unsigned char*)name,namelen) != raxNotFound) return NULL;
+    if (raxFind(Users,(unsigned char*)name,namelen,NULL)) return NULL;
     user *u = zmalloc(sizeof(*u));
     u->name = sdsnewlen(name,namelen);
     u->flags = USER_FLAG_DISABLED;
@@ -456,15 +485,7 @@ void ACLFreeUserAndKillClients(user *u) {
              * this may result in some security hole: it's much
              * more defensive to set the default user and put
              * it in non authenticated mode. */
-            c->user = DefaultUser;
-            c->authenticated = 0;
-            /* We will write replies to this client later, so we can't
-             * close it directly even if async. */
-            if (c == server.current_client) {
-                c->flags |= CLIENT_CLOSE_AFTER_COMMAND;
-            } else {
-                freeClientAsync(c);
-            }
+            deauthenticateAndCloseClient(c);
         }
     }
     ACLFreeUser(u);
@@ -489,12 +510,6 @@ void ACLCopyUser(user *dst, user *src) {
     }
 }
 
-/* Free all the users registered in the radix tree 'users' and free the
- * radix tree itself. */
-void ACLFreeUsersSet(rax *users) {
-    raxFreeWithCallback(users,(void(*)(void*))ACLFreeUserAndKillClients);
-}
-
 /* Given a command ID, this function set by reference 'word' and 'bit'
  * so that user->allowed_commands[word] will address the right word
  * where the corresponding bit for the provided ID is stored, and
@@ -563,7 +578,7 @@ void ACLSelectorRemoveCommandRule(aclSelector *selector, sds new_rule) {
          * as well if the command is removed. */
         char *rule_end = strchr(existing_rule, ' ');
         if (!rule_end) {
-            /* This is the last rule, so it it to the end of the string. */
+            /* This is the last rule, so move it to the end of the string. */
             rule_end = existing_rule + strlen(existing_rule);
 
             /* This approach can leave a trailing space if the last rule is removed,
@@ -580,6 +595,8 @@ void ACLSelectorRemoveCommandRule(aclSelector *selector, sds new_rule) {
                 /* Copy the remaining rules starting at the next rule to replace the rule to be
                  * deleted, including the terminating NULL character. */
                 memmove(copy_position, copy_end, strlen(copy_end) + 1);
+                existing_rule = copy_position;
+                continue;
             }
         }
         existing_rule = copy_end;
@@ -911,7 +928,7 @@ void ACLResetFirstArgs(aclSelector *selector) {
     selector->allowed_firstargs = NULL;
 }
 
-/* Add a first-arh to the list of subcommands for the user 'u' and
+/* Add a first-arg to the list of subcommands for the user 'u' and
  * the command id specified. */
 void ACLAddAllowedFirstArg(aclSelector *selector, unsigned long id, const char *sub) {
     /* If this is the first first-arg to be configured for
@@ -1396,6 +1413,7 @@ user *ACLCreateDefaultUser(void) {
 void ACLInit(void) {
     Users = raxNew();
     UsersToLoad = listCreate();
+    ACLInitCommandCategories();
     listSetMatchMethod(UsersToLoad, ACLListMatchLoadedUser);
     ACLLog = listCreate();
     DefaultUser = ACLCreateDefaultUser();
@@ -1405,7 +1423,7 @@ void ACLInit(void) {
  * otherwise C_ERR is returned and errno is set to:
  *
  *  EINVAL: if the username-password do not match.
- *  ENONENT: if the specified user does not exist at all.
+ *  ENOENT: if the specified user does not exist at all.
  */
 int ACLCheckUserCredentials(robj *username, robj *password) {
     user *u = ACLGetUserByName(username->ptr,sdslen(username->ptr));
@@ -1500,8 +1518,8 @@ unsigned long ACLGetCommandID(sds cmdname) {
     sds lowername = sdsdup(cmdname);
     sdstolower(lowername);
     if (commandId == NULL) commandId = raxNew();
-    void *id = raxFind(commandId,(unsigned char*)lowername,sdslen(lowername));
-    if (id != raxNotFound) {
+    void *id;
+    if (raxFind(commandId,(unsigned char*)lowername,sdslen(lowername),&id)) {
         sdsfree(lowername);
         return (unsigned long)id;
     }
@@ -1532,8 +1550,8 @@ void ACLClearCommandID(void) {
 
 /* Return an username by its name, or NULL if the user does not exist. */
 user *ACLGetUserByName(const char *name, size_t namelen) {
-    void *myuser = raxFind(Users,(unsigned char*)name,namelen);
-    if (myuser == raxNotFound) return NULL;
+    void *myuser = NULL;
+    raxFind(Users,(unsigned char*)name,namelen,&myuser);
     return myuser;
 }
 
@@ -1856,23 +1874,20 @@ int ACLCheckAllPerm(client *c, int *idxptr) {
     return ACLCheckAllUserCommandPerm(c->user, c->cmd, c->argv, c->argc, idxptr);
 }
 
-/* Check if the user's existing pub/sub clients violate the ACL pub/sub
- * permissions specified via the upcoming argument, and kill them if so. */
-void ACLKillPubsubClientsIfNeeded(user *new, user *original) {
+/* If 'new' can access all channels 'original' could then return NULL;
+   Otherwise return a list of channels that the new user can access */
+list *getUpcomingChannelList(user *new, user *original) {
     listIter li, lpi;
     listNode *ln, *lpn;
-    robj *o;
-    int kill = 0;
-    
-    /* First optimization is we check if any selector has all channel
-     * permissions. */
+
+    /* Optimization: we check if any selector has all channel permissions. */
     listRewind(new->selectors,&li);
     while((ln = listNext(&li))) {
         aclSelector *s = (aclSelector *) listNodeValue(ln);
-        if (s->flags & SELECTOR_FLAG_ALLCHANNELS) return;
+        if (s->flags & SELECTOR_FLAG_ALLCHANNELS) return NULL;
     }
 
-    /* Second optimization is to check if the new list of channels
+    /* Next, check if the new list of channels
      * is a strict superset of the original. This is done by
      * created an "upcoming" list of all channels that are in
      * the new user and checking each of the existing channels
@@ -1910,58 +1925,87 @@ void ACLKillPubsubClientsIfNeeded(user *new, user *original) {
     if (match) {
         /* All channels were matched, no need to kill clients. */
         listRelease(upcoming);
-        return;
+        return NULL;
     }
-    
-    /* Permissions have changed, so we need to iterate through all
-     * the clients and disconnect those that are no longer valid.
-     * Scan all connected clients to find the user's pub/subs. */
-    listRewind(server.clients,&li);
-    while ((ln = listNext(&li)) != NULL) {
-        client *c = listNodeValue(ln);
-        kill = 0;
 
-        if (c->user == original && getClientType(c) == CLIENT_TYPE_PUBSUB) {
-            /* Check for pattern violations. */
-            dictIterator *di = dictGetIterator(c->pubsub_patterns);
-            dictEntry *de;
+    return upcoming;
+}
+
+/* Check if the client should be killed because it is subscribed to channels that were
+ * permitted in the past, are not in the `upcoming` channel list. */
+int ACLShouldKillPubsubClient(client *c, list *upcoming) {
+    robj *o;
+    int kill = 0;
+
+    if (getClientType(c) == CLIENT_TYPE_PUBSUB) {
+        /* Check for pattern violations. */
+        dictIterator *di = dictGetIterator(c->pubsub_patterns);
+        dictEntry *de;
+        while (!kill && ((de = dictNext(di)) != NULL)) {
+            o = dictGetKey(de);
+            int res = ACLCheckChannelAgainstList(upcoming, o->ptr, sdslen(o->ptr), 1);
+            kill = (res == ACL_DENIED_CHANNEL);
+        }
+        dictReleaseIterator(di);
+
+        /* Check for channel violations. */
+        if (!kill) {
+            /* Check for global channels violation. */
+            di = dictGetIterator(c->pubsub_channels);
+
             while (!kill && ((de = dictNext(di)) != NULL)) {
                 o = dictGetKey(de);
-                int res = ACLCheckChannelAgainstList(upcoming, o->ptr, sdslen(o->ptr), 1);
+                int res = ACLCheckChannelAgainstList(upcoming, o->ptr, sdslen(o->ptr), 0);
                 kill = (res == ACL_DENIED_CHANNEL);
             }
             dictReleaseIterator(di);
-
-            /* Check for channel violations. */
-            if (!kill) {
-                /* Check for global channels violation. */
-                di = dictGetIterator(c->pubsub_channels);
-                while (!kill && ((de = dictNext(di)) != NULL)) {
-                    o = dictGetKey(de);
-                    int res = ACLCheckChannelAgainstList(upcoming, o->ptr, sdslen(o->ptr), 0);
-                    kill = (res == ACL_DENIED_CHANNEL);
-                }
-                dictReleaseIterator(di);
-            }
-
-            if (!kill) {
-                /* Check for shard channels violation. */
-                di = dictGetIterator(c->pubsubshard_channels);
-                while (!kill && ((de = dictNext(di)) != NULL)) {
-                    o = dictGetKey(de);
-                    int res = ACLCheckChannelAgainstList(upcoming, o->ptr, sdslen(o->ptr), 0);
-                    kill = (res == ACL_DENIED_CHANNEL);
-                }
-                dictReleaseIterator(di);
+        }
+        if (!kill) {
+            /* Check for shard channels violation. */
+            di = dictGetIterator(c->pubsubshard_channels);
+            while (!kill && ((de = dictNext(di)) != NULL)) {
+                o = dictGetKey(de);
+                int res = ACLCheckChannelAgainstList(upcoming, o->ptr, sdslen(o->ptr), 0);
+                kill = (res == ACL_DENIED_CHANNEL);
             }
+            dictReleaseIterator(di);
+        }
 
-            /* Kill it. */
-            if (kill) {
-                freeClient(c);
-            }
+        if (kill) {
+            return 1;
         }
     }
-    listRelease(upcoming);
+    return 0;
+}
+
+/* Check if the user's existing pub/sub clients violate the ACL pub/sub
+ * permissions specified via the upcoming argument, and kill them if so. */
+void ACLKillPubsubClientsIfNeeded(user *new, user *original) {
+    /* Do nothing if there are no subscribers. */
+    if (pubsubTotalSubscriptions() == 0)
+        return;
+
+    list *channels = getUpcomingChannelList(new, original);
+    /* If the new user's pubsub permissions are a strict superset of the original, return early. */
+    if (!channels)
+        return;
+
+    listIter li;
+    listNode *ln;
+
+    /* Permissions have changed, so we need to iterate through all
+     * the clients and disconnect those that are no longer valid.
+     * Scan all connected clients to find the user's pub/subs. */
+    listRewind(server.clients,&li);
+    while ((ln = listNext(&li)) != NULL) {
+        client *c = listNodeValue(ln);
+        if (c->user != original)
+            continue;
+        if (ACLShouldKillPubsubClient(c, channels))
+            deauthenticateAndCloseClient(c);
+    }
+
+    listRelease(channels);
 }
 
 /* =============================================================================
@@ -1990,7 +2034,8 @@ sds *ACLMergeSelectorArguments(sds *argv, int argc, int *merged_argc, int *inval
     for (int j = 0; j < argc; j++) {
         char *op = argv[j];
 
-        if (op[0] == '(' && op[sdslen(op) - 1] != ')') {
+        if (open_bracket_start == -1 &&
+            (op[0] == '(' && op[sdslen(op) - 1] != ')')) {
             selector = sdsdup(argv[j]);
             open_bracket_start = j;
             continue;
@@ -2367,11 +2412,46 @@ sds ACLLoadFromFile(const char *filename) {
         ACLFreeUser(new_default);
         raxInsert(Users,(unsigned char*)"default",7,DefaultUser,NULL);
         raxRemove(old_users,(unsigned char*)"default",7,NULL);
-        ACLFreeUsersSet(old_users);
+
+        /* If there are some subscribers, we need to check if we need to drop some clients. */
+        rax *user_channels = NULL;
+        if (pubsubTotalSubscriptions() > 0) {
+            user_channels = raxNew();
+        }
+
+        listIter li;
+        listNode *ln;
+
+        listRewind(server.clients,&li);
+        while ((ln = listNext(&li)) != NULL) {
+            client *c = listNodeValue(ln);
+            /* a MASTER client can do everything (and user = NULL) so we can skip it */
+            if (c->flags & CLIENT_MASTER)
+                continue;
+            user *original = c->user;
+            list *channels = NULL;
+            user *new = ACLGetUserByName(c->user->name, sdslen(c->user->name));
+            if (new && user_channels) {
+                if (!raxFind(user_channels, (unsigned char*)(new->name), sdslen(new->name), (void**)&channels)) {
+                    channels = getUpcomingChannelList(new, original);
+                    raxInsert(user_channels, (unsigned char*)(new->name), sdslen(new->name), channels, NULL);
+                }
+            }
+            /* When the new channel list is NULL, it means the new user's channel list is a superset of the old user's list. */
+            if (!new || (channels && ACLShouldKillPubsubClient(c, channels))) {
+                deauthenticateAndCloseClient(c);
+                continue;
+            }
+            c->user = new;
+        }
+
+        if (user_channels)
+            raxFreeWithCallback(user_channels, (void(*)(void*))listRelease);
+        raxFreeWithCallback(old_users,(void(*)(void*))ACLFreeUser);
         sdsfree(errors);
         return NULL;
     } else {
-        ACLFreeUsersSet(Users);
+        raxFreeWithCallback(Users,(void(*)(void*))ACLFreeUser);
         Users = old_users;
         errors = sdscat(errors,"WARNING: ACL errors detected, no change to the previously active ACL rules was performed");
         return errors;
@@ -2549,6 +2629,15 @@ void ACLUpdateInfoMetrics(int reason){
     }
 }
 
+static void trimACLLogEntriesToMaxLen(void) {
+    while(listLength(ACLLog) > server.acllog_max_len) {
+        listNode *ln = listLast(ACLLog);
+        ACLLogEntry *le = listNodeValue(ln);
+        ACLFreeLogEntry(le);
+        listDelNode(ACLLog,ln);
+    }
+}
+
 /* Adds a new entry in the ACL log, making sure to delete the old entry
  * if we reach the maximum length allowed for the log. This function attempts
  * to find similar entries in the current log in order to bump the counter of
@@ -2568,6 +2657,11 @@ void addACLLogEntry(client *c, int reason, int context, int argpos, sds username
     /* Update ACL info metrics */
     ACLUpdateInfoMetrics(reason);
     
+    if (server.acllog_max_len == 0) {
+        trimACLLogEntriesToMaxLen();
+        return;
+    }
+    
     /* Create a new entry. */
     struct ACLLogEntry *le = zmalloc(sizeof(*le));
     le->count = 1;
@@ -2630,12 +2724,7 @@ void addACLLogEntry(client *c, int reason, int context, int argpos, sds username
          * to its maximum size. */
         ACLLogEntryCount++; /* Incrementing the entry_id count to make each record in the log unique. */
         listAddNodeHead(ACLLog, le);
-        while(listLength(ACLLog) > server.acllog_max_len) {
-            listNode *ln = listLast(ACLLog);
-            ACLLogEntry *le = listNodeValue(ln);
-            ACLFreeLogEntry(le);
-            listDelNode(ACLLog,ln);
-        }
+        trimACLLogEntriesToMaxLen();
     }
 }
 
@@ -2759,8 +2848,7 @@ void aclCommand(client *c) {
         sds username = c->argv[2]->ptr;
         /* Check username validity. */
         if (ACLStringHasSpaces(username,sdslen(username))) {
-            addReplyErrorFormat(c,
-                "Usernames can't contain spaces or null characters");
+            addReplyError(c, "Usernames can't contain spaces or null characters");
             return;
         }
 
@@ -2778,6 +2866,10 @@ void aclCommand(client *c) {
         }
         return;
     } else if (!strcasecmp(sub,"deluser") && c->argc >= 3) {
+        /* Initially redact all the arguments to not leak any information
+         * about the users. */
+        for (int j = 2; j < c->argc; j++) redactClientCommandArgument(c, j);
+
         int deleted = 0;
         for (int j = 2; j < c->argc; j++) {
             sds username = c->argv[j]->ptr;
@@ -2800,6 +2892,9 @@ void aclCommand(client *c) {
         }
         addReplyLongLong(c,deleted);
     } else if (!strcasecmp(sub,"getuser") && c->argc == 3) {
+        /* Redact the username to not leak any information about the user. */
+        redactClientCommandArgument(c, 2);
+
         user *u = ACLGetUserByName(c->argv[2]->ptr,sdslen(c->argv[2]->ptr));
         if (u == NULL) {
             addReplyNull(c);
diff --git a/src/adlist.c b/src/adlist.c
index f031c46e87d..0e8f6d5c14a 100644
--- a/src/adlist.c
+++ b/src/adlist.c
@@ -1,31 +1,10 @@
 /* adlist.c - A generic doubly linked list implementation
  *
- * Copyright (c) 2006-2010, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2006-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 
@@ -76,6 +55,8 @@ void listEmpty(list *list)
  * This function can't fail. */
 void listRelease(list *list)
 {
+    if (!list)
+        return;
     listEmpty(list);
     zfree(list);
 }
diff --git a/src/adlist.h b/src/adlist.h
index 7c5443769b3..b91fe5070ef 100644
--- a/src/adlist.h
+++ b/src/adlist.h
@@ -1,31 +1,10 @@
 /* adlist.h - A generic doubly linked list implementation
  *
- * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2006-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __ADLIST_H__
diff --git a/src/ae.c b/src/ae.c
index 1b6422b2db8..3d3569865ae 100644
--- a/src/ae.c
+++ b/src/ae.c
@@ -2,32 +2,11 @@
  * for the Jim's event-loop (Jim is a Tcl interpreter) but later translated
  * it in form of a library for easy reuse.
  *
- * Copyright (c) 2006-2010, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2006-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "ae.h"
@@ -149,6 +128,8 @@ void aeDeleteEventLoop(aeEventLoop *eventLoop) {
     aeTimeEvent *next_te, *te = eventLoop->timeEventHead;
     while (te) {
         next_te = te->next;
+        if (te->finalizerProc)
+            te->finalizerProc(eventLoop, te->clientData);
         zfree(te);
         te = next_te;
     }
@@ -333,7 +314,7 @@ static int processTimeEvents(aeEventLoop *eventLoop) {
             processed++;
             now = getMonotonicUs();
             if (retval != AE_NOMORE) {
-                te->when = now + retval * 1000;
+                te->when = now + (monotime)retval * 1000;
             } else {
                 te->id = AE_DELETED_EVENT_ID;
             }
@@ -343,8 +324,8 @@ static int processTimeEvents(aeEventLoop *eventLoop) {
     return processed;
 }
 
-/* Process every pending time event, then every pending file event
- * (that may be registered by time event callbacks just processed).
+/* Process every pending file event, then every pending time event
+ * (that may be registered by file event callbacks just processed).
  * Without special flags the function sleeps until some file event
  * fires, or when the next time event occurs (if any).
  *
diff --git a/src/ae.h b/src/ae.h
index 70ce8a2d543..5f1e17f7dc7 100644
--- a/src/ae.h
+++ b/src/ae.h
@@ -2,32 +2,11 @@
  * for the Jim's event-loop (Jim is a Tcl interpreter) but later translated
  * it in form of a library for easy reuse.
  *
- * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2006-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __AE_H__
diff --git a/src/ae_epoll.c b/src/ae_epoll.c
index 493ffcad2eb..d03d694feaf 100644
--- a/src/ae_epoll.c
+++ b/src/ae_epoll.c
@@ -1,31 +1,10 @@
 /* Linux epoll(2) based ae.c module
  *
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 
diff --git a/src/ae_select.c b/src/ae_select.c
index f8ef959662c..63d0dfb82a9 100644
--- a/src/ae_select.c
+++ b/src/ae_select.c
@@ -1,31 +1,10 @@
 /* Select()-based ae.c module.
  *
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 
diff --git a/src/anet.c b/src/anet.c
index 790ea7e0aca..705b9e5ce7f 100644
--- a/src/anet.c
+++ b/src/anet.c
@@ -1,31 +1,10 @@
 /* anet.c -- Basic TCP socket stuff made a bit less boring
  *
- * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2006-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "fmacros.h"
@@ -82,7 +61,7 @@ int anetSetBlock(char *err, int fd, int non_block) {
         return ANET_ERR;
     }
 
-    /* Check if this flag has been set or unset, if so, 
+    /* Check if this flag has been set or unset, if so,
      * then there is no need to call fcntl to set/unset it again. */
     if (!!(flags & O_NONBLOCK) == !!non_block)
         return ANET_OK;
@@ -107,8 +86,8 @@ int anetBlock(char *err, int fd) {
     return anetSetBlock(err,fd,0);
 }
 
-/* Enable the FD_CLOEXEC on the given fd to avoid fd leaks. 
- * This function should be invoked for fd's on specific places 
+/* Enable the FD_CLOEXEC on the given fd to avoid fd leaks.
+ * This function should be invoked for fd's on specific places
  * where fork + execve system calls are called. */
 int anetCloexec(int fd) {
     int r;
@@ -130,57 +109,145 @@ int anetCloexec(int fd) {
     return r;
 }
 
-/* Set TCP keep alive option to detect dead peers. The interval option
- * is only used for Linux as we are using Linux-specific APIs to set
- * the probe send time, interval, and count. */
+/* Enable TCP keep-alive mechanism to detect dead peers,
+ * TCP_KEEPIDLE, TCP_KEEPINTVL and TCP_KEEPCNT will be set accordingly. */
 int anetKeepAlive(char *err, int fd, int interval)
 {
-    int val = 1;
-
-    if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &val, sizeof(val)) == -1)
+    int enabled = 1;
+    if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &enabled, sizeof(enabled)))
     {
         anetSetError(err, "setsockopt SO_KEEPALIVE: %s", strerror(errno));
         return ANET_ERR;
     }
 
-#ifdef __linux__
+    int idle;
+    int intvl;
+    int cnt;
+
+    /* There are platforms that are expected to support the full mechanism of TCP keep-alive,
+     * we want the compiler to emit warnings of unused variables if the preprocessor directives
+     * somehow fail, and other than those platforms, just omit these warnings if they happen.
+     */
+#if !(defined(_AIX) || defined(__APPLE__) || defined(__DragonFly__) || \
+    defined(__FreeBSD__) || defined(__illumos__) || defined(__linux__) || \
+    defined(__NetBSD__) || defined(__sun))
+    UNUSED(interval);
+    UNUSED(idle);
+    UNUSED(intvl);
+    UNUSED(cnt);
+#endif
+
+#ifdef __sun
+    /* The implementation of TCP keep-alive on Solaris/SmartOS is a bit unusual
+     * compared to other Unix-like systems.
+     * Thus, we need to specialize it on Solaris.
+     *
+     * There are two keep-alive mechanisms on Solaris:
+     * - By default, the first keep-alive probe is sent out after a TCP connection is idle for two hours.
+     * If the peer does not respond to the probe within eight minutes, the TCP connection is aborted.
+     * You can alter the interval for sending out the first probe using the socket option TCP_KEEPALIVE_THRESHOLD
+     * in milliseconds or TCP_KEEPIDLE in seconds.
+     * The system default is controlled by the TCP ndd parameter tcp_keepalive_interval. The minimum value is ten seconds.
+     * The maximum is ten days, while the default is two hours. If you receive no response to the probe,
+     * you can use the TCP_KEEPALIVE_ABORT_THRESHOLD socket option to change the time threshold for aborting a TCP connection.
+     * The option value is an unsigned integer in milliseconds. The value zero indicates that TCP should never time out and
+     * abort the connection when probing. The system default is controlled by the TCP ndd parameter tcp_keepalive_abort_interval.
+     * The default is eight minutes.
+     *
+     * - The second implementation is activated if socket option TCP_KEEPINTVL and/or TCP_KEEPCNT are set.
+     * The time between each consequent probes is set by TCP_KEEPINTVL in seconds.
+     * The minimum value is ten seconds. The maximum is ten days, while the default is two hours.
+     * The TCP connection will be aborted after certain amount of probes, which is set by TCP_KEEPCNT, without receiving response.
+     */
+
+    idle = interval;
+    if (idle < 10) idle = 10; // kernel expects at least 10 seconds
+    if (idle > 10*24*60*60) idle = 10*24*60*60; // kernel expects at most 10 days
+
+    /* `TCP_KEEPIDLE`, `TCP_KEEPINTVL`, and `TCP_KEEPCNT` were not available on Solaris
+     * until version 11.4, but let's take a chance here. */
+#if defined(TCP_KEEPIDLE) && defined(TCP_KEEPINTVL) && defined(TCP_KEEPCNT)
+    if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &idle, sizeof(idle))) {
+        anetSetError(err, "setsockopt TCP_KEEPIDLE: %s\n", strerror(errno));
+        return ANET_ERR;
+    }
+
+    intvl = idle/3;
+    if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &intvl, sizeof(intvl))) {
+        anetSetError(err, "setsockopt TCP_KEEPINTVL: %s\n", strerror(errno));
+        return ANET_ERR;
+    }
+
+    cnt = 3;
+    if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &cnt, sizeof(cnt))) {
+        anetSetError(err, "setsockopt TCP_KEEPCNT: %s\n", strerror(errno));
+        return ANET_ERR;
+    }
+#else
+    /* Fall back to the first implementation of tcp-alive mechanism for older Solaris,
+     * simulate the tcp-alive mechanism on other platforms via `TCP_KEEPALIVE_THRESHOLD` + `TCP_KEEPALIVE_ABORT_THRESHOLD`.
+     */
+    idle *= 1000; // kernel expects milliseconds
+    if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE_THRESHOLD, &idle, sizeof(idle))) {
+        anetSetError(err, "setsockopt TCP_KEEPINTVL: %s\n", strerror(errno));
+        return ANET_ERR;
+    }
+
+    /* Note that the consequent probes will not be sent at equal intervals on Solaris,
+     * but will be sent using the exponential backoff algorithm. */
+    intvl = idle/3;
+    cnt = 3;
+    int time_to_abort = intvl * cnt;
+    if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE_ABORT_THRESHOLD, &time_to_abort, sizeof(time_to_abort))) {
+        anetSetError(err, "setsockopt TCP_KEEPCNT: %s\n", strerror(errno));
+        return ANET_ERR;
+    }
+#endif
+
+    return ANET_OK;
+
+#endif
+
+#ifdef TCP_KEEPIDLE
     /* Default settings are more or less garbage, with the keepalive time
-     * set to 7200 by default on Linux. Modify settings to make the feature
-     * actually useful. */
+     * set to 7200 by default on Linux and other Unix-like systems.
+     * Modify settings to make the feature actually useful. */
 
     /* Send first probe after interval. */
-    val = interval;
-    if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &val, sizeof(val)) < 0) {
+    idle = interval;
+    if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &idle, sizeof(idle))) {
         anetSetError(err, "setsockopt TCP_KEEPIDLE: %s\n", strerror(errno));
         return ANET_ERR;
     }
+#elif defined(TCP_KEEPALIVE)
+    /* Darwin/macOS uses TCP_KEEPALIVE in place of TCP_KEEPIDLE. */
+    idle = interval;
+    if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &idle, sizeof(idle))) {
+        anetSetError(err, "setsockopt TCP_KEEPALIVE: %s\n", strerror(errno));
+        return ANET_ERR;
+    }
+#endif
 
+#ifdef TCP_KEEPINTVL
     /* Send next probes after the specified interval. Note that we set the
      * delay as interval / 3, as we send three probes before detecting
      * an error (see the next setsockopt call). */
-    val = interval/3;
-    if (val == 0) val = 1;
-    if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &val, sizeof(val)) < 0) {
+    intvl = interval/3;
+    if (intvl == 0) intvl = 1;
+    if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &intvl, sizeof(intvl))) {
         anetSetError(err, "setsockopt TCP_KEEPINTVL: %s\n", strerror(errno));
         return ANET_ERR;
     }
+#endif
 
+#ifdef TCP_KEEPCNT
     /* Consider the socket in error state after three we send three ACK
      * probes without getting a reply. */
-    val = 3;
-    if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &val, sizeof(val)) < 0) {
+    cnt = 3;
+    if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &cnt, sizeof(cnt))) {
         anetSetError(err, "setsockopt TCP_KEEPCNT: %s\n", strerror(errno));
         return ANET_ERR;
     }
-#elif defined(__APPLE__)
-    /* Set idle time with interval */
-    val = interval;
-    if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &val, sizeof(val)) < 0) {
-        anetSetError(err, "setsockopt TCP_KEEPALIVE: %s\n", strerror(errno));
-        return ANET_ERR;
-    }
-#else
-    ((void) interval); /* Avoid unused var warning for non Linux systems. */
 #endif
 
     return ANET_OK;
@@ -239,7 +306,11 @@ int anetRecvTimeout(char *err, int fd, long long ms) {
  *
  * If flags is set to ANET_IP_ONLY the function only resolves hostnames
  * that are actually already IPv4 or IPv6 addresses. This turns the function
- * into a validating / normalizing function. */
+ * into a validating / normalizing function.
+ *
+ * If the flag ANET_PREFER_IPV4 is set, IPv4 is preferred over IPv6.
+ * If the flag ANET_PREFER_IPV6 is set, IPv6 is preferred over IPv4.
+ * */
 int anetResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len,
                        int flags)
 {
@@ -249,9 +320,20 @@ int anetResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len,
     memset(&hints,0,sizeof(hints));
     if (flags & ANET_IP_ONLY) hints.ai_flags = AI_NUMERICHOST;
     hints.ai_family = AF_UNSPEC;
+    if (flags & ANET_PREFER_IPV4 && !(flags & ANET_PREFER_IPV6)) {
+        hints.ai_family = AF_INET;
+    } else if (flags & ANET_PREFER_IPV6 && !(flags & ANET_PREFER_IPV4)) {
+        hints.ai_family = AF_INET6;
+    }
     hints.ai_socktype = SOCK_STREAM;  /* specify socktype to avoid dups */
 
-    if ((rv = getaddrinfo(host, NULL, &hints, &info)) != 0) {
+    rv = getaddrinfo(host, NULL, &hints, &info);
+    if (rv != 0 && hints.ai_family != AF_UNSPEC) {
+        /* Try the other IP version. */
+        hints.ai_family = (hints.ai_family == AF_INET) ? AF_INET6 : AF_INET;
+        rv = getaddrinfo(host, NULL, &hints, &info);
+    }
+    if (rv != 0) {
         anetSetError(err, "%s", gai_strerror(rv));
         return ANET_ERR;
     }
@@ -417,13 +499,16 @@ int anetUnixGenericConnect(char *err, const char *path, int flags)
     return s;
 }
 
-static int anetListen(char *err, int s, struct sockaddr *sa, socklen_t len, int backlog) {
+static int anetListen(char *err, int s, struct sockaddr *sa, socklen_t len, int backlog, mode_t perm) {
     if (bind(s,sa,len) == -1) {
         anetSetError(err, "bind: %s", strerror(errno));
         close(s);
         return ANET_ERR;
     }
 
+    if (sa->sa_family == AF_LOCAL && perm)
+        chmod(((struct sockaddr_un *) sa)->sun_path, perm);
+
     if (listen(s, backlog) == -1) {
         anetSetError(err, "listen: %s", strerror(errno));
         close(s);
@@ -467,7 +552,7 @@ static int _anetTcpServer(char *err, int port, char *bindaddr, int af, int backl
 
         if (af == AF_INET6 && anetV6Only(err,s) == ANET_ERR) goto error;
         if (anetSetReuseAddr(err,s) == ANET_ERR) goto error;
-        if (anetListen(err,s,p->ai_addr,p->ai_addrlen,backlog) == ANET_ERR) s = ANET_ERR;
+        if (anetListen(err,s,p->ai_addr,p->ai_addrlen,backlog,0) == ANET_ERR) s = ANET_ERR;
         goto end;
     }
     if (p == NULL) {
@@ -508,10 +593,8 @@ int anetUnixServer(char *err, char *path, mode_t perm, int backlog)
     memset(&sa,0,sizeof(sa));
     sa.sun_family = AF_LOCAL;
     redis_strlcpy(sa.sun_path,path,sizeof(sa.sun_path));
-    if (anetListen(err,s,(struct sockaddr*)&sa,sizeof(sa),backlog) == ANET_ERR)
+    if (anetListen(err,s,(struct sockaddr*)&sa,sizeof(sa),backlog,perm) == ANET_ERR)
         return ANET_ERR;
-    if (perm)
-        chmod(sa.sun_path, perm);
     return s;
 }
 
diff --git a/src/anet.h b/src/anet.h
index b13c14f7758..8ad5f4b0bab 100644
--- a/src/anet.h
+++ b/src/anet.h
@@ -1,31 +1,10 @@
 /* anet.c -- Basic TCP socket stuff made a bit less boring
  *
- * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2006-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef ANET_H
@@ -40,6 +19,8 @@
 /* Flags used with certain functions. */
 #define ANET_NONE 0
 #define ANET_IP_ONLY (1<<0)
+#define ANET_PREFER_IPV4 (1<<1)
+#define ANET_PREFER_IPV6 (1<<2)
 
 #if defined(__sun) || defined(_AIX)
 #define AF_LOCAL AF_UNIX
diff --git a/src/aof.c b/src/aof.c
index 468d577f8e9..ec631c0e214 100644
--- a/src/aof.c
+++ b/src/aof.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -117,7 +96,9 @@ aofInfo *aofInfoDup(aofInfo *orig) {
     return ai;
 }
 
-/* Format aofInfo as a string and it will be a line in the manifest. */
+/* Format aofInfo as a string and it will be a line in the manifest.
+ *
+ * When update this format, make sure to update redis-check-aof as well. */
 sds aofInfoFormat(sds buf, aofInfo *ai) {
     sds filename_repr = NULL;
 
@@ -833,7 +814,7 @@ int openNewIncrAofForAppend(void) {
      * is already synced at this point so fsync doesn't matter. */
     if (server.aof_fd != -1) {
         aof_background_fsync_and_close(server.aof_fd);
-        server.aof_last_fsync = server.unixtime;
+        server.aof_last_fsync = server.mstime;
     }
     server.aof_fd = newfd;
 
@@ -954,7 +935,7 @@ void stopAppendOnly(void) {
     if (redis_fsync(server.aof_fd) == -1) {
         serverLog(LL_WARNING,"Fail to fsync the AOF file: %s",strerror(errno));
     } else {
-        server.aof_last_fsync = server.unixtime;
+        server.aof_last_fsync = server.mstime;
     }
     close(server.aof_fd);
 
@@ -976,18 +957,6 @@ void stopAppendOnly(void) {
 int startAppendOnly(void) {
     serverAssert(server.aof_state == AOF_OFF);
 
-    /* Wait for all bio jobs related to AOF to drain. This prevents a race
-     * between updates to `fsynced_reploff_pending` of the worker thread, belonging
-     * to the previous AOF, and the new one. This concern is specific for a full
-     * sync scenario where we don't wanna risk the ACKed replication offset
-     * jumping backwards or forward when switching to a different master. */
-    bioDrainWorker(BIO_AOF_FSYNC);
-
-    /* Set the initial repl_offset, which will be applied to fsynced_reploff
-     * when AOFRW finishes (after possibly being updated by a bio thread) */
-    atomicSet(server.fsynced_reploff_pending, server.master_repl_offset);
-    server.fsynced_reploff = 0;
-
     server.aof_state = AOF_WAIT_REWRITE;
     if (hasActiveChildProcess() && server.child_type != CHILD_TYPE_AOF) {
         server.aof_rewrite_scheduled = 1;
@@ -1010,7 +979,7 @@ int startAppendOnly(void) {
             return C_ERR;
         }
     }
-    server.aof_last_fsync = server.unixtime;
+    server.aof_last_fsync = server.mstime;
     /* If AOF fsync error in bio job, we just ignore it and log the event. */
     int aof_bio_fsync_status;
     atomicGet(server.aof_bio_fsync_status, aof_bio_fsync_status);
@@ -1086,7 +1055,7 @@ void flushAppendOnlyFile(int force) {
          * the data in page cache cannot be flushed in time. */
         if (server.aof_fsync == AOF_FSYNC_EVERYSEC &&
             server.aof_last_incr_fsync_offset != server.aof_last_incr_size &&
-            server.unixtime > server.aof_last_fsync &&
+            server.mstime - server.aof_last_fsync >= 1000 &&
             !(sync_in_progress = aofFsyncInProgress())) {
             goto try_fsync;
 
@@ -1099,6 +1068,13 @@ void flushAppendOnlyFile(int force) {
         {
             goto try_fsync;
         } else {
+            /* All data is fsync'd already: Update fsynced_reploff_pending just in case.
+             * This is needed to avoid a WAITAOF hang in case a module used RM_Call with the NO_AOF flag,
+             * in which case master_repl_offset will increase but fsynced_reploff_pending won't be updated
+             * (because there's no reason, from the AOF POV, to call fsync) and then WAITAOF may wait on
+             * the higher offset (which contains data that was only propagated to replicas, and not to AOF) */
+            if (!sync_in_progress && server.aof_fsync != AOF_FSYNC_NO)
+                atomicSet(server.fsynced_reploff_pending, server.master_repl_offset);
             return;
         }
     }
@@ -1114,9 +1090,9 @@ void flushAppendOnlyFile(int force) {
             if (server.aof_flush_postponed_start == 0) {
                 /* No previous write postponing, remember that we are
                  * postponing the flush and return. */
-                server.aof_flush_postponed_start = server.unixtime;
+                server.aof_flush_postponed_start = server.mstime;
                 return;
-            } else if (server.unixtime - server.aof_flush_postponed_start < 2) {
+            } else if (server.mstime - server.aof_flush_postponed_start < 2000) {
                 /* We were already waiting for fsync to finish, but for less
                  * than two seconds this is still ok. Postpone again. */
                 return;
@@ -1265,15 +1241,15 @@ void flushAppendOnlyFile(int force) {
         latencyEndMonitor(latency);
         latencyAddSampleIfNeeded("aof-fsync-always",latency);
         server.aof_last_incr_fsync_offset = server.aof_last_incr_size;
-        server.aof_last_fsync = server.unixtime;
+        server.aof_last_fsync = server.mstime;
         atomicSet(server.fsynced_reploff_pending, server.master_repl_offset);
     } else if (server.aof_fsync == AOF_FSYNC_EVERYSEC &&
-               server.unixtime > server.aof_last_fsync) {
+               server.mstime - server.aof_last_fsync >= 1000) {
         if (!sync_in_progress) {
             aof_background_fsync(server.aof_fd);
             server.aof_last_incr_fsync_offset = server.aof_last_incr_size;
         }
-        server.aof_last_fsync = server.unixtime;
+        server.aof_last_fsync = server.mstime;
     }
 }
 
@@ -1859,6 +1835,7 @@ int rewriteSetObject(rio *r, robj *key, robj *o) {
                 !rioWriteBulkString(r,"SADD",4) ||
                 !rioWriteBulkObject(r,key))
             {
+                setTypeReleaseIterator(si);
                 return 0;
             }
         }
@@ -1962,19 +1939,21 @@ int rewriteSortedSetObject(rio *r, robj *key, robj *o) {
  *
  * The function returns 0 on error, non-zero on success. */
 static int rioWriteHashIteratorCursor(rio *r, hashTypeIterator *hi, int what) {
-    if (hi->encoding == OBJ_ENCODING_LISTPACK) {
+    if ((hi->encoding == OBJ_ENCODING_LISTPACK) || (hi->encoding == OBJ_ENCODING_LISTPACK_EX)) {
         unsigned char *vstr = NULL;
         unsigned int vlen = UINT_MAX;
         long long vll = LLONG_MAX;
 
-        hashTypeCurrentFromListpack(hi, what, &vstr, &vlen, &vll);
+        hashTypeCurrentFromListpack(hi, what, &vstr, &vlen, &vll, NULL);
         if (vstr)
             return rioWriteBulkString(r, (char*)vstr, vlen);
         else
             return rioWriteBulkLongLong(r, vll);
     } else if (hi->encoding == OBJ_ENCODING_HT) {
-        sds value = hashTypeCurrentFromHashTable(hi, what);
-        return rioWriteBulkString(r, value, sdslen(value));
+        char *str;
+        size_t len;
+        hashTypeCurrentFromHashTable(hi, what, &str, &len, NULL);
+        return rioWriteBulkString(r, str, len);
     }
 
     serverPanic("Unknown hash encoding");
@@ -1984,37 +1963,60 @@ static int rioWriteHashIteratorCursor(rio *r, hashTypeIterator *hi, int what) {
 /* Emit the commands needed to rebuild a hash object.
  * The function returns 0 on error, 1 on success. */
 int rewriteHashObject(rio *r, robj *key, robj *o) {
+    int res = 0; /*fail*/
+
     hashTypeIterator *hi;
-    long long count = 0, items = hashTypeLength(o);
+    long long count = 0, items = hashTypeLength(o, 0);
 
+    int isHFE = hashTypeGetMinExpire(o, 0) != EB_EXPIRE_TIME_INVALID;
     hi = hashTypeInitIterator(o);
-    while (hashTypeNext(hi) != C_ERR) {
-        if (count == 0) {
-            int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
-                AOF_REWRITE_ITEMS_PER_CMD : items;
 
-            if (!rioWriteBulkCount(r,'*',2+cmd_items*2) ||
-                !rioWriteBulkString(r,"HMSET",5) ||
-                !rioWriteBulkObject(r,key)) 
-            {
-                hashTypeReleaseIterator(hi);
-                return 0;
+    if (!isHFE) {
+        while (hashTypeNext(hi, 0) != C_ERR) {
+            if (count == 0) {
+                int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
+                                AOF_REWRITE_ITEMS_PER_CMD : items;
+                if (!rioWriteBulkCount(r, '*', 2 + cmd_items * 2) ||
+                    !rioWriteBulkString(r, "HMSET", 5) ||
+                    !rioWriteBulkObject(r, key))
+                    goto reHashEnd;
             }
-        }
 
-        if (!rioWriteHashIteratorCursor(r, hi, OBJ_HASH_KEY) ||
-            !rioWriteHashIteratorCursor(r, hi, OBJ_HASH_VALUE))
-        {
-            hashTypeReleaseIterator(hi);
-            return 0;           
+            if (!rioWriteHashIteratorCursor(r, hi, OBJ_HASH_KEY) ||
+                !rioWriteHashIteratorCursor(r, hi, OBJ_HASH_VALUE))
+                goto reHashEnd;
+
+            if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
+            items--;
+        }
+    } else {
+        while (hashTypeNext(hi, 0) != C_ERR) {
+
+            char hmsetCmd[] = "*4\r\n$5\r\nHMSET\r\n";
+            if ( (!rioWrite(r, hmsetCmd, sizeof(hmsetCmd) - 1)) ||
+                 (!rioWriteBulkObject(r, key)) ||
+                 (!rioWriteHashIteratorCursor(r, hi, OBJ_HASH_KEY)) ||
+                 (!rioWriteHashIteratorCursor(r, hi, OBJ_HASH_VALUE)) )
+                goto reHashEnd;
+
+            if (hi->expire_time != EB_EXPIRE_TIME_INVALID) {
+                char cmd[] = "*6\r\n$10\r\nHPEXPIREAT\r\n";
+                if ( (!rioWrite(r, cmd, sizeof(cmd) - 1)) ||
+                     (!rioWriteBulkObject(r, key)) ||
+                     (!rioWriteBulkLongLong(r, hi->expire_time)) ||
+                     (!rioWriteBulkString(r, "FIELDS", 6)) ||
+                     (!rioWriteBulkString(r, "1", 1)) ||
+                     (!rioWriteHashIteratorCursor(r, hi, OBJ_HASH_KEY)) )
+                    goto reHashEnd;
+            }
         }
-        if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
-        items--;
     }
 
-    hashTypeReleaseIterator(hi);
+    res = 1; /* success */
 
-    return 1;
+reHashEnd:
+    hashTypeReleaseIterator(hi);
+    return res;
 }
 
 /* Helper for rewriteStreamObject() that generates a bulk string into the
@@ -2245,11 +2247,11 @@ static int rewriteFunctions(rio *aof) {
 }
 
 int rewriteAppendOnlyFileRio(rio *aof) {
-    dictIterator *di = NULL;
     dictEntry *de;
     int j;
     long key_count = 0;
     long long updated_time = 0;
+    kvstoreIterator *kvs_it = NULL;
 
     /* Record timestamp at the beginning of rewriting AOF. */
     if (server.aof_timestamp_enabled) {
@@ -2262,17 +2264,16 @@ int rewriteAppendOnlyFileRio(rio *aof) {
 
     for (j = 0; j < server.dbnum; j++) {
         char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n";
-        redisDb *db = server.db+j;
-        dict *d = db->dict;
-        if (dictSize(d) == 0) continue;
-        di = dictGetSafeIterator(d);
+        redisDb *db = server.db + j;
+        if (kvstoreSize(db->keys) == 0) continue;
 
         /* SELECT the new DB */
         if (rioWrite(aof,selectcmd,sizeof(selectcmd)-1) == 0) goto werr;
         if (rioWriteBulkLongLong(aof,j) == 0) goto werr;
 
+        kvs_it = kvstoreIteratorInit(db->keys);
         /* Iterate this DB writing every entry */
-        while((de = dictNext(di)) != NULL) {
+        while((de = kvstoreIteratorNext(kvs_it)) != NULL) {
             sds keystr;
             robj key, *o;
             long long expiretime;
@@ -2337,13 +2338,12 @@ int rewriteAppendOnlyFileRio(rio *aof) {
             if (server.rdb_key_save_delay)
                 debugDelay(server.rdb_key_save_delay);
         }
-        dictReleaseIterator(di);
-        di = NULL;
+        kvstoreIteratorRelease(kvs_it);
     }
     return C_OK;
 
 werr:
-    if (di) dictReleaseIterator(di);
+    if (kvs_it) kvstoreIteratorRelease(kvs_it);
     return C_ERR;
 }
 
@@ -2454,7 +2454,23 @@ int rewriteAppendOnlyFileBackground(void) {
         server.aof_lastbgrewrite_status = C_ERR;
         return C_ERR;
     }
+
+    if (server.aof_state == AOF_WAIT_REWRITE) {
+        /* Wait for all bio jobs related to AOF to drain. This prevents a race
+         * between updates to `fsynced_reploff_pending` of the worker thread, belonging
+         * to the previous AOF, and the new one. This concern is specific for a full
+         * sync scenario where we don't wanna risk the ACKed replication offset
+         * jumping backwards or forward when switching to a different master. */
+        bioDrainWorker(BIO_AOF_FSYNC);
+
+        /* Set the initial repl_offset, which will be applied to fsynced_reploff
+         * when AOFRW finishes (after possibly being updated by a bio thread) */
+        atomicSet(server.fsynced_reploff_pending, server.master_repl_offset);
+        server.fsynced_reploff = 0;
+    }
+
     server.stat_aof_rewrites++;
+
     if ((childpid = redisFork(CHILD_TYPE_AOF)) == 0) {
         char tmpfile[256];
 
diff --git a/src/asciilogo.h b/src/asciilogo.h
index a62f68cf94c..d1778edf329 100644
--- a/src/asciilogo.h
+++ b/src/asciilogo.h
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 const char *ascii_logo =
diff --git a/src/atomicvar.h b/src/atomicvar.h
index 2c2969c33b9..b8529ba271b 100644
--- a/src/atomicvar.h
+++ b/src/atomicvar.h
@@ -1,16 +1,41 @@
 /* This file implements atomic counters using c11 _Atomic, __atomic or __sync
  * macros if available, otherwise we will throw an error when compile.
  *
- * The exported interface is composed of three macros:
+ * The exported interface is composed of the following macros:
  *
  * atomicIncr(var,count) -- Increment the atomic counter
  * atomicGetIncr(var,oldvalue_var,count) -- Get and increment the atomic counter
+ * atomicIncrGet(var,newvalue_var,count) -- Increment and get the atomic counter new value
  * atomicDecr(var,count) -- Decrement the atomic counter
  * atomicGet(var,dstvar) -- Fetch the atomic counter value
  * atomicSet(var,value)  -- Set the atomic counter value
  * atomicGetWithSync(var,value)  -- 'atomicGet' with inter-thread synchronization
  * atomicSetWithSync(var,value)  -- 'atomicSet' with inter-thread synchronization
- *
+ * 
+ * Atomic operations on flags. 
+ * Flag type can be int, long, long long or their unsigned counterparts.
+ * The value of the flag can be 1 or 0.
+ * 
+ * atomicFlagGetSet(var,oldvalue_var) -- Get and set the atomic counter value
+ * 
+ * NOTE1: __atomic* and _Atomic implementations can be actually elaborated to support any value by changing the 
+ * hardcoded new value passed to __atomic_exchange* from 1 to @param count
+ * i.e oldvalue_var = atomic_exchange_explicit(&var, count).
+ * However, in order to be compatible with the __sync functions family, we can use only 0 and 1.
+ * The only exchange alternative suggested by __sync is __sync_lock_test_and_set, 
+ * But as described by the gnu manual for __sync_lock_test_and_set():
+ * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fsync-Builtins.html
+ * "A target may support reduced functionality here by which the only valid value to store is the immediate constant 1. The exact value
+ * actually stored in *ptr is implementation defined."
+ * Hence, we can't rely on it for a any value other than 1.
+ * We eventually chose to implement this method with __sync_val_compare_and_swap since it satisfies functionality needed for atomicFlagGetSet
+ * (if the flag was 0 -> set to 1, if it's already 1 -> do nothing, but the final result is that the flag is set), 
+ * and also it has a full barrier (__sync_lock_test_and_set has acquire barrier).
+ * 
+ * NOTE2: Unlike other atomic type, which aren't guaranteed to be lock free, c11 atmoic_flag does.
+ * To check whether a type is lock free, atomic_is_lock_free() can be used. 
+ * It can be considered to limit the flag type to atomic_flag to improve performance.
+ * 
  * Never use return value from the macros, instead use the AtomicGetIncr()
  * if you need to get the current value and increment it atomically, like
  * in the following example:
@@ -21,32 +46,11 @@
  *
  * ----------------------------------------------------------------------------
  *
- * Copyright (c) 2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2015-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include <pthread.h>
@@ -93,6 +97,8 @@
 #define atomicGetIncr(var,oldvalue_var,count) do { \
     oldvalue_var = atomic_fetch_add_explicit(&var,(count),memory_order_relaxed); \
 } while(0)
+#define atomicIncrGet(var, newvalue_var, count) \
+    newvalue_var = atomicIncr(var,count) + count
 #define atomicDecr(var,count) atomic_fetch_sub_explicit(&var,(count),memory_order_relaxed)
 #define atomicGet(var,dstvar) do { \
     dstvar = atomic_load_explicit(&var,memory_order_relaxed); \
@@ -103,6 +109,8 @@
 } while(0)
 #define atomicSetWithSync(var,value) \
     atomic_store_explicit(&var,value,memory_order_seq_cst)
+#define atomicFlagGetSet(var,oldvalue_var) \
+    oldvalue_var = atomic_exchange_explicit(&var,1,memory_order_relaxed)
 #define REDIS_ATOMIC_API "c11-builtin"
 
 #elif !defined(__ATOMIC_VAR_FORCE_SYNC_MACROS) && \
@@ -111,6 +119,8 @@
 /* Implementation using __atomic macros. */
 
 #define atomicIncr(var,count) __atomic_add_fetch(&var,(count),__ATOMIC_RELAXED)
+#define atomicIncrGet(var, newvalue_var, count) \
+    newvalue_var = __atomic_add_fetch(&var,(count),__ATOMIC_RELAXED)
 #define atomicGetIncr(var,oldvalue_var,count) do { \
     oldvalue_var = __atomic_fetch_add(&var,(count),__ATOMIC_RELAXED); \
 } while(0)
@@ -124,12 +134,16 @@
 } while(0)
 #define atomicSetWithSync(var,value) \
     __atomic_store_n(&var,value,__ATOMIC_SEQ_CST)
+#define atomicFlagGetSet(var,oldvalue_var) \
+    oldvalue_var = __atomic_exchange_n(&var,1,__ATOMIC_RELAXED)
 #define REDIS_ATOMIC_API "atomic-builtin"
 
 #elif defined(HAVE_ATOMIC)
 /* Implementation using __sync macros. */
 
 #define atomicIncr(var,count) __sync_add_and_fetch(&var,(count))
+#define atomicIncrGet(var, newvalue_var, count) \
+    newvalue_var = __sync_add_and_fetch(&var,(count))
 #define atomicGetIncr(var,oldvalue_var,count) do { \
     oldvalue_var = __sync_fetch_and_add(&var,(count)); \
 } while(0)
@@ -149,6 +163,8 @@
     ANNOTATE_HAPPENS_BEFORE(&var);  \
     while(!__sync_bool_compare_and_swap(&var,var,value,__sync_synchronize)); \
 } while(0)
+#define atomicFlagGetSet(var,oldvalue_var) \
+    oldvalue_var = __sync_val_compare_and_swap(&var,0,1)
 #define REDIS_ATOMIC_API "sync-builtin"
 
 #else
diff --git a/src/bio.c b/src/bio.c
index 10ecf8db294..6f96ef709ee 100644
--- a/src/bio.c
+++ b/src/bio.c
@@ -1,16 +1,16 @@
 /* Background I/O service for Redis.
  *
  * This file implements operations that we need to perform in the background.
- * Currently there is only a single operation, that is a background close(2)
- * system call. This is needed as when the process is the last owner of a
- * reference to a file closing it means unlinking it, and the deletion of the
- * file is slow, blocking the server.
+ * Currently there are 3 operations:
+ * 1) a background close(2) system call. This is needed when the process is
+ *    the last owner of a reference to a file closing it means unlinking it, and
+ *    the deletion of the file is slow, blocking the server.
+ * 2) AOF fsync
+ * 3) lazyfree of memory
  *
  * In the future we'll either continue implementing new things we need or
  * we'll switch to libeio. However there are probably long term uses for this
- * file as we may want to put here Redis specific background tasks (for instance
- * it is not impossible that we'll need a non blocking FLUSHDB/FLUSHALL
- * implementation).
+ * file as we may want to put here Redis specific background tasks.
  *
  * DESIGN
  * ------
@@ -26,42 +26,26 @@
  * least-recently-inserted to the most-recently-inserted (older jobs processed
  * first).
  *
- * Currently there is no way for the creator of the job to be notified about
- * the completion of the operation, this will only be added when/if needed.
+ * To let the creator of the job to be notified about the completion of the 
+ * operation, it will need to submit additional dummy job, coined as
+ * completion job request that will be written back eventually, by the
+ * background thread, into completion job response queue. This notification
+ * layout can simplify flows that might submit more than one job, such as
+ * in case of FLUSHALL which for a single command submits multiple jobs. It
+ * is also correct because jobs are processed in FIFO fashion.
  *
  * ----------------------------------------------------------------------------
  *
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
-
 #include "server.h"
 #include "bio.h"
+#include <fcntl.h>
 
 static char* bio_worker_title[] = {
     "bio_close_file",
@@ -76,6 +60,9 @@ static unsigned int bio_job_to_worker[] = {
     [BIO_AOF_FSYNC] = 1,
     [BIO_CLOSE_AOF] = 1,
     [BIO_LAZY_FREE] = 2,
+    [BIO_COMP_RQ_CLOSE_FILE] = 0,
+    [BIO_COMP_RQ_AOF_FSYNC]  = 1,
+    [BIO_COMP_RQ_LAZY_FREE]  = 2
 };
 
 static pthread_t bio_threads[BIO_WORKER_NUM];
@@ -84,6 +71,18 @@ static pthread_cond_t bio_newjob_cond[BIO_WORKER_NUM];
 static list *bio_jobs[BIO_WORKER_NUM];
 static unsigned long bio_jobs_counter[BIO_NUM_OPS] = {0};
 
+/* The bio_comp_list is used to hold completion job responses and to handover
+ * to main thread to callback as notification for job completion. Main
+ * thread will be triggered to read the list by signaling via writing to a pipe */
+static list *bio_comp_list;
+static pthread_mutex_t bio_mutex_comp;
+static int job_comp_pipe[2];   /* Pipe used to awake the event loop */
+
+typedef struct bio_comp_item {
+    comp_fn *func;    /* callback after completion job will be processed  */
+    uint64_t arg;     /* user data to be passed to the function */
+} bio_comp_item;
+
 /* This structure represents a background Job. It is only used locally to this
  * file as the API does not expose the internals at all. */
 typedef union bio_job {
@@ -107,9 +106,15 @@ typedef union bio_job {
         lazy_free_fn *free_fn; /* Function that will free the provided arguments */
         void *free_args[]; /* List of arguments to be passed to the free function */
     } free_args;
+    struct {
+        int type; /* header */
+        comp_fn *fn; /* callback. Handover to main thread to cb as notify for job completion */
+        uint64_t arg; /* callback arguments */
+    } comp_rq;
 } bio_job;
 
 void *bioProcessBackgroundJobs(void *arg);
+void bioPipeReadJobCompList(aeEventLoop *el, int fd, void *privdata, int mask);
 
 /* Make sure we have enough stack to perform all the things we do in the
  * main thread. */
@@ -129,6 +134,27 @@ void bioInit(void) {
         bio_jobs[j] = listCreate();
     }
 
+    /* init jobs comp responses */
+    bio_comp_list = listCreate();
+    pthread_mutex_init(&bio_mutex_comp, NULL);
+
+    /* Create a pipe for background thread to be able to wake up the redis main thread.
+     * Make the pipe non blocking. This is just a best effort aware mechanism
+     * and we do not want to block not in the read nor in the write half.
+     * Enable close-on-exec flag on pipes in case of the fork-exec system calls in
+     * sentinels or redis servers. */
+    if (anetPipe(job_comp_pipe, O_CLOEXEC|O_NONBLOCK, O_CLOEXEC|O_NONBLOCK) == -1) {
+        serverLog(LL_WARNING,
+                  "Can't create the pipe for bio thread: %s", strerror(errno));
+        exit(1);
+    }
+
+    /* Register a readable event for the pipe used to awake the event loop on job completion */
+    if (aeCreateFileEvent(server.el, job_comp_pipe[0], AE_READABLE,
+                          bioPipeReadJobCompList, NULL) == AE_ERR) {
+        serverPanic("Error registering the readable event for the bio pipe.");
+    }
+
     /* Set the stack size as by default it may be small in some system */
     pthread_attr_init(&attr);
     pthread_attr_getstacksize(&attr,&stacksize);
@@ -174,6 +200,28 @@ void bioCreateLazyFreeJob(lazy_free_fn free_fn, int arg_count, ...) {
     bioSubmitJob(BIO_LAZY_FREE, job);
 }
 
+void bioCreateCompRq(bio_worker_t assigned_worker, comp_fn *func, uint64_t user_data) {
+    int type;
+    switch (assigned_worker) {
+        case BIO_WORKER_CLOSE_FILE:
+            type = BIO_COMP_RQ_CLOSE_FILE;
+            break;
+        case BIO_WORKER_AOF_FSYNC:
+            type = BIO_COMP_RQ_AOF_FSYNC;
+            break;
+        case BIO_WORKER_LAZY_FREE:
+            type = BIO_COMP_RQ_LAZY_FREE;
+            break;
+        default:
+            serverPanic("Invalid worker type in bioCreateCompRq().");
+    }
+
+    bio_job *job = zmalloc(sizeof(*job));
+    job->comp_rq.fn = func;
+    job->comp_rq.arg = user_data;
+    bioSubmitJob(type, job);
+}
+
 void bioCreateCloseJob(int fd, int need_fsync, int need_reclaim_cache) {
     bio_job *job = zmalloc(sizeof(*job));
     job->fd_args.fd = fd;
@@ -285,6 +333,21 @@ void *bioProcessBackgroundJobs(void *arg) {
                 close(job->fd_args.fd);
         } else if (job_type == BIO_LAZY_FREE) {
             job->free_args.free_fn(job->free_args.free_args);
+        } else if ((job_type == BIO_COMP_RQ_CLOSE_FILE) ||
+                   (job_type == BIO_COMP_RQ_AOF_FSYNC) ||
+                   (job_type == BIO_COMP_RQ_LAZY_FREE)) {
+            bio_comp_item *comp_rsp = zmalloc(sizeof(bio_comp_item));
+            comp_rsp->func = job->comp_rq.fn;
+            comp_rsp->arg = job->comp_rq.arg;
+
+            /* just write it to completion job responses */
+            pthread_mutex_lock(&bio_mutex_comp);
+            listAddNodeTail(bio_comp_list, comp_rsp);
+            pthread_mutex_unlock(&bio_mutex_comp);
+
+            if (write(job_comp_pipe[1],"A",1) != 1) {
+                /* Pipe is non-blocking, write() may fail if it's full. */
+            }
         } else {
             serverPanic("Wrong job type in bioProcessBackgroundJobs().");
         }
@@ -343,3 +406,34 @@ void bioKillThreads(void) {
         }
     }
 }
+
+void bioPipeReadJobCompList(aeEventLoop *el, int fd, void *privdata, int mask) {
+    UNUSED(el);
+    UNUSED(mask);
+    UNUSED(privdata);
+
+    char buf[128];
+    list *tmp_list = NULL;
+
+    while (read(fd, buf, sizeof(buf)) == sizeof(buf));
+
+    /* Handle event loop events if pipe was written from event loop API */
+    pthread_mutex_lock(&bio_mutex_comp);
+    if (listLength(bio_comp_list)) {
+        tmp_list = bio_comp_list;
+        bio_comp_list = listCreate();
+    }
+    pthread_mutex_unlock(&bio_mutex_comp);
+
+    if (!tmp_list) return;
+
+    /* callback to all job completions  */
+    while (listLength(tmp_list)) {
+        listNode *ln = listFirst(tmp_list);
+        bio_comp_item *rsp = ln->value;
+        listDelNode(tmp_list, ln);
+        rsp->func(rsp->arg);
+        zfree(rsp);
+    }
+    listRelease(tmp_list);
+}
diff --git a/src/bio.h b/src/bio.h
index 0d1fe9b4b9f..2679a2bf550 100644
--- a/src/bio.h
+++ b/src/bio.h
@@ -1,36 +1,35 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __BIO_H
 #define __BIO_H
 
 typedef void lazy_free_fn(void *args[]);
+typedef void comp_fn(uint64_t user_data);
+
+typedef enum bio_worker_t {
+    BIO_WORKER_CLOSE_FILE = 0,
+    BIO_WORKER_AOF_FSYNC,
+    BIO_WORKER_LAZY_FREE,
+    BIO_WORKER_NUM
+} bio_worker_t;
+
+/* Background job opcodes */
+typedef enum bio_job_type_t {
+    BIO_CLOSE_FILE = 0,     /* Deferred close(2) syscall. */
+    BIO_AOF_FSYNC,          /* Deferred AOF fsync. */
+    BIO_LAZY_FREE,          /* Deferred objects freeing. */
+    BIO_CLOSE_AOF,
+    BIO_COMP_RQ_CLOSE_FILE,  /* Job completion request, registered on close-file worker's queue */
+    BIO_COMP_RQ_AOF_FSYNC,  /* Job completion request, registered on aof-fsync worker's queue */
+    BIO_COMP_RQ_LAZY_FREE,  /* Job completion request, registered on lazy-free worker's queue */
+    BIO_NUM_OPS
+} bio_job_type_t;
 
 /* Exported API */
 void bioInit(void);
@@ -41,14 +40,7 @@ void bioCreateCloseJob(int fd, int need_fsync, int need_reclaim_cache);
 void bioCreateCloseAofJob(int fd, long long offset, int need_reclaim_cache);
 void bioCreateFsyncJob(int fd, long long offset, int need_reclaim_cache);
 void bioCreateLazyFreeJob(lazy_free_fn free_fn, int arg_count, ...);
+void bioCreateCompRq(bio_worker_t assigned_worker, comp_fn *func, uint64_t user_data);
 
-/* Background job opcodes */
-enum {
-    BIO_CLOSE_FILE = 0, /* Deferred close(2) syscall. */
-    BIO_AOF_FSYNC,      /* Deferred AOF fsync. */
-    BIO_LAZY_FREE,      /* Deferred objects freeing. */
-    BIO_CLOSE_AOF,      /* Deferred close for AOF files. */
-    BIO_NUM_OPS
-};
 
 #endif
diff --git a/src/bitops.c b/src/bitops.c
index 23d80554e44..c0388a15d6c 100644
--- a/src/bitops.c
+++ b/src/bitops.c
@@ -1,31 +1,10 @@
 /* Bit operations.
  *
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -802,25 +781,12 @@ void bitcountCommand(client *c) {
     int isbit = 0;
     unsigned char first_byte_neg_mask = 0, last_byte_neg_mask = 0;
 
-    /* Lookup, check for type, and return 0 for non existing keys. */
-    if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
-        checkType(c,o,OBJ_STRING)) return;
-    p = getObjectReadOnlyString(o,&strlen,llbuf);
-
     /* Parse start/end range if any. */
     if (c->argc == 4 || c->argc == 5) {
-        long long totlen = strlen;
-        /* Make sure we will not overflow */
-        serverAssert(totlen <= LLONG_MAX >> 3);
         if (getLongLongFromObjectOrReply(c,c->argv[2],&start,NULL) != C_OK)
             return;
         if (getLongLongFromObjectOrReply(c,c->argv[3],&end,NULL) != C_OK)
             return;
-        /* Convert negative indexes */
-        if (start < 0 && end < 0 && start > end) {
-            addReply(c,shared.czero);
-            return;
-        }
         if (c->argc == 5) {
             if (!strcasecmp(c->argv[4]->ptr,"bit")) isbit = 1;
             else if (!strcasecmp(c->argv[4]->ptr,"byte")) isbit = 0;
@@ -829,6 +795,20 @@ void bitcountCommand(client *c) {
                 return;
             }
         }
+        /* Lookup, check for type. */
+        o = lookupKeyRead(c->db, c->argv[1]);
+        if (checkType(c, o, OBJ_STRING)) return;
+        p = getObjectReadOnlyString(o,&strlen,llbuf);
+        long long totlen = strlen;
+
+        /* Make sure we will not overflow */
+        serverAssert(totlen <= LLONG_MAX >> 3);
+
+        /* Convert negative indexes */
+        if (start < 0 && end < 0 && start > end) {
+            addReply(c,shared.czero);
+            return;
+        }
         if (isbit) totlen <<= 3;
         if (start < 0) start = totlen+start;
         if (end < 0) end = totlen+end;
@@ -844,6 +824,10 @@ void bitcountCommand(client *c) {
             end >>= 3;
         }
     } else if (c->argc == 2) {
+        /* Lookup, check for type. */
+        o = lookupKeyRead(c->db, c->argv[1]);
+        if (checkType(c, o, OBJ_STRING)) return;
+        p = getObjectReadOnlyString(o,&strlen,llbuf);
         /* The whole string. */
         start = 0;
         end = strlen-1;
@@ -853,6 +837,12 @@ void bitcountCommand(client *c) {
         return;
     }
 
+    /* Return 0 for non existing keys. */
+    if (o == NULL) {
+        addReply(c, shared.czero);
+        return;
+    }
+
     /* Precondition: end >= 0 && end < strlen, so the only condition where
      * zero can be returned is: start > end. */
     if (start > end) {
@@ -892,21 +882,8 @@ void bitposCommand(client *c) {
         return;
     }
 
-    /* If the key does not exist, from our point of view it is an infinite
-     * array of 0 bits. If the user is looking for the first clear bit return 0,
-     * If the user is looking for the first set bit, return -1. */
-    if ((o = lookupKeyRead(c->db,c->argv[1])) == NULL) {
-        addReplyLongLong(c, bit ? -1 : 0);
-        return;
-    }
-    if (checkType(c,o,OBJ_STRING)) return;
-    p = getObjectReadOnlyString(o,&strlen,llbuf);
-
     /* Parse start/end range if any. */
     if (c->argc == 4 || c->argc == 5 || c->argc == 6) {
-        long long totlen = strlen;
-        /* Make sure we will not overflow */
-        serverAssert(totlen <= LLONG_MAX >> 3);
         if (getLongLongFromObjectOrReply(c,c->argv[3],&start,NULL) != C_OK)
             return;
         if (c->argc == 6) {
@@ -921,10 +898,22 @@ void bitposCommand(client *c) {
             if (getLongLongFromObjectOrReply(c,c->argv[4],&end,NULL) != C_OK)
                 return;
             end_given = 1;
-        } else {
+        }
+
+        /* Lookup, check for type. */
+        o = lookupKeyRead(c->db, c->argv[1]);
+        if (checkType(c, o, OBJ_STRING)) return;
+        p = getObjectReadOnlyString(o, &strlen, llbuf);
+
+        /* Make sure we will not overflow */
+        long long totlen = strlen;
+        serverAssert(totlen <= LLONG_MAX >> 3);
+
+        if (c->argc < 5) {
             if (isbit) end = (totlen<<3) + 7;
             else end = totlen-1;
         }
+
         if (isbit) totlen <<= 3;
         /* Convert negative indexes */
         if (start < 0) start = totlen+start;
@@ -941,6 +930,11 @@ void bitposCommand(client *c) {
             end >>= 3;
         }
     } else if (c->argc == 3) {
+        /* Lookup, check for type. */
+        o = lookupKeyRead(c->db, c->argv[1]);
+        if (checkType(c,o,OBJ_STRING)) return;
+        p = getObjectReadOnlyString(o,&strlen,llbuf);
+
         /* The whole string. */
         start = 0;
         end = strlen-1;
@@ -950,6 +944,14 @@ void bitposCommand(client *c) {
         return;
     }
 
+    /* If the key does not exist, from our point of view it is an infinite
+     * array of 0 bits. If the user is looking for the first clear bit return 0,
+     * If the user is looking for the first set bit, return -1. */
+    if (o == NULL) {
+        addReplyLongLong(c, bit ? -1 : 0);
+        return;
+    }
+
     /* For empty ranges (start > end) we return -1 as an empty range does
      * not contain a 0 nor a 1. */
     if (start > end) {
diff --git a/src/blocked.c b/src/blocked.c
index 6ad4667dba5..009e2557b31 100644
--- a/src/blocked.c
+++ b/src/blocked.c
@@ -1,31 +1,10 @@
 /* blocked.c - generic support for blocking operations like BLPOP & WAIT.
  *
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  *
  * ---------------------------------------------------------------------------
  *
@@ -89,6 +68,7 @@ void blockClient(client *c, int btype) {
     /* Master client should never be blocked unless pause or module */
     serverAssert(!(c->flags & CLIENT_MASTER &&
                    btype != BLOCKED_MODULE &&
+                   btype != BLOCKED_LAZYFREE &&
                    btype != BLOCKED_POSTPONE));
 
     c->flags |= CLIENT_BLOCKED;
@@ -196,6 +176,8 @@ void unblockClient(client *c, int queue_for_reprocessing) {
         c->postponed_list_node = NULL;
     } else if (c->bstate.btype == BLOCKED_SHUTDOWN) {
         /* No special cleanup. */
+    } else if (c->bstate.btype == BLOCKED_LAZYFREE) {
+        /* No special cleanup. */
     } else {
         serverPanic("Unknown btype in unblockClient().");
     }
@@ -227,7 +209,9 @@ void unblockClient(client *c, int queue_for_reprocessing) {
  * send it a reply of some kind. After this function is called,
  * unblockClient() will be called with the same client as argument. */
 void replyToBlockedClientTimedOut(client *c) {
-    if (c->bstate.btype == BLOCKED_LIST ||
+    if (c->bstate.btype == BLOCKED_LAZYFREE) {
+        addReply(c, shared.ok); /* No reason lazy-free to fail */
+    } else if (c->bstate.btype == BLOCKED_LIST ||
         c->bstate.btype == BLOCKED_ZSET ||
         c->bstate.btype == BLOCKED_STREAM) {
         addReplyNullArray(c);
@@ -239,7 +223,7 @@ void replyToBlockedClientTimedOut(client *c) {
         addReplyLongLong(c,server.fsynced_reploff >= c->bstate.reploffset);
         addReplyLongLong(c,replicationCountAOFAcksByOffset(c->bstate.reploffset));
     } else if (c->bstate.btype == BLOCKED_MODULE) {
-        moduleBlockedClientTimedOut(c);
+        moduleBlockedClientTimedOut(c, 0);
     } else {
         serverPanic("Unknown btype in replyToBlockedClientTimedOut().");
     }
@@ -284,9 +268,16 @@ void disconnectAllBlockedClients(void) {
             if (c->bstate.btype == BLOCKED_POSTPONE)
                 continue;
 
-            unblockClientOnError(c,
-                "-UNBLOCKED force unblock from blocking operation, "
-                "instance state changed (master -> replica?)");
+            if (c->bstate.btype == BLOCKED_LAZYFREE) {
+                addReply(c, shared.ok); /* No reason lazy-free to fail */
+                c->flags &= ~CLIENT_PENDING_COMMAND;
+                unblockClient(c, 1);
+            } else {
+
+                unblockClientOnError(c,
+                                     "-UNBLOCKED force unblock from blocking operation, "
+                                     "instance state changed (master -> replica?)");
+            }
             c->flags |= CLIENT_CLOSE_AFTER_REPLY;
         }
     }
@@ -370,7 +361,12 @@ void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeo
     list *l;
     int j;
 
-    c->bstate.timeout = timeout;
+    if (!(c->flags & CLIENT_REPROCESSING_COMMAND)) {
+        /* If the client is re-processing the command, we do not set the timeout
+         * because we need to retain the client's original timeout. */
+        c->bstate.timeout = timeout;
+    }
+
     for (j = 0; j < numkeys; j++) {
         /* If the key already exists in the dictionary ignore it. */
         if (!(client_blocked_entry = dictAddRaw(c->bstate.keys,keys[j],NULL))) {
@@ -392,7 +388,6 @@ void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeo
         listAddNodeTail(l,c);
         dictSetVal(c->bstate.keys,client_blocked_entry,listLast(l));
 
-
         /* We need to add the key to blocking_keys_unblock_on_nokey, if the client
          * wants to be awakened if key is deleted (like XREADGROUP) */
         if (unblock_on_nokey) {
@@ -703,6 +698,9 @@ static void moduleUnblockClientOnKey(client *c, robj *key) {
  * we want to remove the pending flag to indicate we already responded to the
  * command with timeout reply. */
 void unblockClientOnTimeout(client *c) {
+    /* The client has been unlocked (in the moduleUnblocked list), return ASAP. */
+    if (c->bstate.btype == BLOCKED_MODULE && isModuleClientUnblocked(c)) return;
+
     replyToBlockedClientTimedOut(c);
     if (c->flags & CLIENT_PENDING_COMMAND)
         c->flags &= ~CLIENT_PENDING_COMMAND;
@@ -720,21 +718,6 @@ void unblockClientOnError(client *c, const char *err_str) {
     unblockClient(c, 1);
 }
 
-/* sets blocking_keys to the total number of keys which has at least one client blocked on them
- * sets blocking_keys_on_nokey to the total number of keys which has at least one client
- * blocked on them to be written or deleted */
-void totalNumberOfBlockingKeys(unsigned long *blocking_keys, unsigned long *bloking_keys_on_nokey) {
-    unsigned long bkeys=0, bkeys_on_nokey=0;
-    for (int j = 0; j < server.dbnum; j++) {
-        bkeys += dictSize(server.db[j].blocking_keys);
-        bkeys_on_nokey += dictSize(server.db[j].blocking_keys_unblock_on_nokey);
-    }
-    if (blocking_keys)
-        *blocking_keys = bkeys;
-    if (bloking_keys_on_nokey)
-        *bloking_keys_on_nokey = bkeys_on_nokey;
-}
-
 void blockedBeforeSleep(void) {
     /* Handle precise timeouts of blocked clients. */
     handleBlockedClientsTimeout();
diff --git a/src/call_reply.c b/src/call_reply.c
index ccd1b36d457..b246361afce 100644
--- a/src/call_reply.c
+++ b/src/call_reply.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2021, Redis Labs Ltd.
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
diff --git a/src/call_reply.h b/src/call_reply.h
index 657f24735ce..fc7013ea0ac 100644
--- a/src/call_reply.h
+++ b/src/call_reply.h
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2021, Redis Labs Ltd.
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef SRC_CALL_REPLY_H_
diff --git a/src/childinfo.c b/src/childinfo.c
index 1303dd04384..eb98d469a89 100644
--- a/src/childinfo.c
+++ b/src/childinfo.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2016-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
diff --git a/src/cli_common.c b/src/cli_common.c
index 421e7d34a39..65372661e8f 100644
--- a/src/cli_common.c
+++ b/src/cli_common.c
@@ -1,35 +1,16 @@
 /* CLI (command line interface) common methods
  * 
- * Copyright (c) 2020, Redis Labs
+ * Copyright (c) 2020-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "fmacros.h"
 #include "cli_common.h"
+#include "version.h"
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <fcntl.h>
@@ -48,6 +29,9 @@
 
 #define UNUSED(V) ((void) V)
 
+char *redisGitSHA1(void);
+char *redisGitDirty(void);
+
 /* Wrapper around redisSecureConnection to avoid hiredis_ssl dependencies if
  * not building with TLS support.
  */
@@ -406,3 +390,34 @@ sds escapeJsonString(sds s, const char *p, size_t len) {
     }
     return sdscatlen(s,"\"",1);
 }
+
+sds cliVersion(void) {
+    sds version = sdscatprintf(sdsempty(), "%s", REDIS_VERSION);
+
+    /* Add git commit and working tree status when available. */
+    if (strtoll(redisGitSHA1(),NULL,16)) {
+        version = sdscatprintf(version, " (git:%s", redisGitSHA1());
+        if (strtoll(redisGitDirty(),NULL,10))
+            version = sdscatprintf(version, "-dirty");
+        version = sdscat(version, ")");
+    }
+    return version;
+}
+
+/* This is a wrapper to call redisConnect or redisConnectWithTimeout. */
+redisContext *redisConnectWrapper(const char *ip, int port, const struct timeval tv) {
+    if (tv.tv_sec == 0 && tv.tv_usec == 0) {
+        return redisConnect(ip, port);
+    } else {
+        return redisConnectWithTimeout(ip, port, tv);
+    }
+}
+
+/* This is a wrapper to call redisConnectUnix or redisConnectUnixWithTimeout. */
+redisContext *redisConnectUnixWrapper(const char *path, const struct timeval tv) {
+    if (tv.tv_sec == 0 && tv.tv_usec == 0) {
+        return redisConnectUnix(path);
+    } else {
+        return redisConnectUnixWithTimeout(path, tv);
+    }
+}
diff --git a/src/cli_common.h b/src/cli_common.h
index cffdee61d89..a5b8e44a28c 100644
--- a/src/cli_common.h
+++ b/src/cli_common.h
@@ -51,4 +51,9 @@ void freeCliConnInfo(cliConnInfo connInfo);
 
 sds escapeJsonString(sds s, const char *p, size_t len);
 
+sds cliVersion(void);
+
+redisContext *redisConnectWrapper(const char *ip, int port, const struct timeval tv);
+redisContext *redisConnectUnixWrapper(const char *path, const struct timeval tv);
+
 #endif /* __CLICOMMON_H */
diff --git a/src/cluster.c b/src/cluster.c
index a390585f3e6..d09a455b3a6 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -1,6479 +1,91 @@
-/* Redis Cluster implementation.
- *
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "server.h"
-#include "cluster.h"
-#include "endianconv.h"
-#include "connection.h"
-
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <arpa/inet.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/stat.h>
-#include <sys/file.h>
-#include <math.h>
-#include <ctype.h>
-
-/* A global reference to myself is handy to make code more clear.
- * Myself always points to server.cluster->myself, that is, the clusterNode
- * that represents this node. */
-clusterNode *myself = NULL;
-
-clusterNode *createClusterNode(char *nodename, int flags);
-void clusterAddNode(clusterNode *node);
-void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask);
-void clusterReadHandler(connection *conn);
-void clusterSendPing(clusterLink *link, int type);
-void clusterSendFail(char *nodename);
-void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request);
-void clusterUpdateState(void);
-int clusterNodeGetSlotBit(clusterNode *n, int slot);
-list *clusterGetNodesInMyShard(clusterNode *node);
-int clusterNodeAddSlave(clusterNode *master, clusterNode *slave);
-int clusterAddSlot(clusterNode *n, int slot);
-int clusterDelSlot(int slot);
-int clusterDelNodeSlots(clusterNode *node);
-int clusterNodeSetSlotBit(clusterNode *n, int slot);
-void clusterSetMaster(clusterNode *n);
-void clusterHandleSlaveFailover(void);
-void clusterHandleSlaveMigration(int max_slaves);
-int bitmapTestBit(unsigned char *bitmap, int pos);
-void bitmapSetBit(unsigned char *bitmap, int pos);
-void bitmapClearBit(unsigned char *bitmap, int pos);
-void clusterDoBeforeSleep(int flags);
-void clusterSendUpdate(clusterLink *link, clusterNode *node);
-void resetManualFailover(void);
-void clusterCloseAllSlots(void);
-void clusterSetNodeAsMaster(clusterNode *n);
-void clusterDelNode(clusterNode *delnode);
-sds representClusterNodeFlags(sds ci, uint16_t flags);
-sds representSlotInfo(sds ci, uint16_t *slot_info_pairs, int slot_info_pairs_count);
-void clusterFreeNodesSlotsInfo(clusterNode *n);
-uint64_t clusterGetMaxEpoch(void);
-int clusterBumpConfigEpochWithoutConsensus(void);
-void moduleCallClusterReceivers(const char *sender_id, uint64_t module_id, uint8_t type, const unsigned char *payload, uint32_t len);
-const char *clusterGetMessageTypeString(int type);
-void removeChannelsInSlot(unsigned int slot);
-unsigned int countKeysInSlot(unsigned int hashslot);
-unsigned int countChannelsInSlot(unsigned int hashslot);
-unsigned int delKeysInSlot(unsigned int hashslot);
-void clusterAddNodeToShard(const char *shard_id, clusterNode *node);
-list *clusterLookupNodeListByShardId(const char *shard_id);
-void clusterRemoveNodeFromShard(clusterNode *node);
-int auxShardIdSetter(clusterNode *n, void *value, int length);
-sds auxShardIdGetter(clusterNode *n, sds s);
-int auxShardIdPresent(clusterNode *n);
-int auxHumanNodenameSetter(clusterNode *n, void *value, int length);
-sds auxHumanNodenameGetter(clusterNode *n, sds s);
-int auxHumanNodenamePresent(clusterNode *n);
-int auxTcpPortSetter(clusterNode *n, void *value, int length);
-sds auxTcpPortGetter(clusterNode *n, sds s);
-int auxTcpPortPresent(clusterNode *n);
-int auxTlsPortSetter(clusterNode *n, void *value, int length);
-sds auxTlsPortGetter(clusterNode *n, sds s);
-int auxTlsPortPresent(clusterNode *n);
-static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen);
-
-int getNodeDefaultClientPort(clusterNode *n) {
-    return server.tls_cluster ? n->tls_port : n->tcp_port;
-}
-
-static inline int getNodeDefaultReplicationPort(clusterNode *n) {
-    return server.tls_replication ? n->tls_port : n->tcp_port;
-}
-
-static inline int getNodeClientPort(clusterNode *n, int use_tls) {
-    return use_tls ? n->tls_port : n->tcp_port;
-}
-
-static inline int defaultClientPort(void) {
-    return server.tls_cluster ? server.tls_port : server.port;
-}
-
-/* Links to the next and previous entries for keys in the same slot are stored
- * in the dict entry metadata. See Slot to Key API below. */
-#define dictEntryNextInSlot(de) \
-    (((clusterDictEntryMetadata *)dictEntryMetadata(de))->next)
-#define dictEntryPrevInSlot(de) \
-    (((clusterDictEntryMetadata *)dictEntryMetadata(de))->prev)
-
-#define isSlotUnclaimed(slot) \
-    (server.cluster->slots[slot] == NULL || \
-        bitmapTestBit(server.cluster->owner_not_claiming_slot, slot))
-
-#define RCVBUF_INIT_LEN 1024
-#define RCVBUF_MAX_PREALLOC (1<<20) /* 1MB */
-
-/* Cluster nodes hash table, mapping nodes addresses 1.2.3.4:6379 to
- * clusterNode structures. */
-dictType clusterNodesDictType = {
-        dictSdsHash,                /* hash function */
-        NULL,                       /* key dup */
-        NULL,                       /* val dup */
-        dictSdsKeyCompare,          /* key compare */
-        dictSdsDestructor,          /* key destructor */
-        NULL,                       /* val destructor */
-        NULL                        /* allow to expand */
-};
-
-/* Cluster re-addition blacklist. This maps node IDs to the time
- * we can re-add this node. The goal is to avoid reading a removed
- * node for some time. */
-dictType clusterNodesBlackListDictType = {
-        dictSdsCaseHash,            /* hash function */
-        NULL,                       /* key dup */
-        NULL,                       /* val dup */
-        dictSdsKeyCaseCompare,      /* key compare */
-        dictSdsDestructor,          /* key destructor */
-        NULL,                       /* val destructor */
-        NULL                        /* allow to expand */
-};
-
-static ConnectionType *connTypeOfCluster(void) {
-    if (server.tls_cluster) {
-        return connectionTypeTls();
-    }
-
-    return connectionTypeTcp();
-}
-/* Cluster shards hash table, mapping shard id to list of nodes */
-dictType clusterSdsToListType = {
-        dictSdsHash,                /* hash function */
-        NULL,                       /* key dup */
-        NULL,                       /* val dup */
-        dictSdsKeyCompare,          /* key compare */
-        dictSdsDestructor,          /* key destructor */
-        dictListDestructor,         /* val destructor */
-        NULL                        /* allow to expand */
-};
-
-/* Aux fields are introduced in Redis 7.2 to support the persistence
- * of various important node properties, such as shard id, in nodes.conf.
- * Aux fields take an explicit format of name=value pairs and have no
- * intrinsic order among them. Aux fields are always grouped together
- * at the end of the second column of each row after the node's IP
- * address/port/cluster_port and the optional hostname. Aux fields
- * are separated by ','. */
-
-/* Aux field setter function prototype
- * return C_OK when the update is successful; C_ERR otherwise */
-typedef int (aux_value_setter) (clusterNode* n, void *value, int length);
-/* Aux field getter function prototype
- * return an sds that is a concatenation of the input sds string and
- * the aux value */
-typedef sds (aux_value_getter) (clusterNode* n, sds s);
-
-typedef int (aux_value_present) (clusterNode* n);
-
-typedef struct {
-    char *field;
-    aux_value_setter *setter;
-    aux_value_getter *getter;
-    aux_value_present *isPresent;
-} auxFieldHandler;
-
-/* Assign index to each aux field */
-typedef enum {
-    af_shard_id,
-    af_human_nodename,
-    af_tcp_port,
-    af_tls_port,
-    af_count,
-} auxFieldIndex;
-
-/* Note that
- * 1. the order of the elements below must match that of their
- *    indices as defined in auxFieldIndex
- * 2. aux name can contain characters that pass the isValidAuxChar check only */
-auxFieldHandler auxFieldHandlers[] = {
-    {"shard-id", auxShardIdSetter, auxShardIdGetter, auxShardIdPresent},
-    {"nodename", auxHumanNodenameSetter, auxHumanNodenameGetter, auxHumanNodenamePresent},
-    {"tcp-port", auxTcpPortSetter, auxTcpPortGetter, auxTcpPortPresent},
-    {"tls-port", auxTlsPortSetter, auxTlsPortGetter, auxTlsPortPresent},
-};
-
-int isValidAuxChar(int c) {
-    return isalnum(c) || (strchr("!#$%&()*+:;<>?@[]^{|}~", c) == NULL);
-}
-
-int isValidAuxString(char *s, unsigned int length) {
-    for (unsigned i = 0; i < length; i++) {
-        if (!isValidAuxChar(s[i])) return 0;
-    }
-    return 1;
-}
-
-int auxShardIdSetter(clusterNode *n, void *value, int length) {
-    if (verifyClusterNodeId(value, length) == C_ERR) {
-        return C_ERR;
-    }
-    memcpy(n->shard_id, value, CLUSTER_NAMELEN);
-    /* if n already has replicas, make sure they all agree
-     * on the shard id */
-    for (int i = 0; i < n->numslaves; i++) {
-        if (memcmp(n->slaves[i]->shard_id, n->shard_id, CLUSTER_NAMELEN) != 0) {
-            return C_ERR;
-        }
-    }
-    clusterAddNodeToShard(value, n);
-    return C_OK;
-}
-
-sds auxShardIdGetter(clusterNode *n, sds s) {
-    return sdscatprintf(s, "%.40s", n->shard_id);
-}
-
-int auxShardIdPresent(clusterNode *n) {
-    return strlen(n->shard_id);
-}
-
-int auxHumanNodenameSetter(clusterNode *n, void *value, int length) {
-    if (n && !strncmp(value, n->human_nodename, length)) {
-        return C_OK;
-    } else if (!n && (length == 0)) {
-        return C_OK;
-    }
-    if (n) {
-        n->human_nodename = sdscpylen(n->human_nodename, value, length);
-    } else if (sdslen(n->human_nodename) != 0) {
-        sdsclear(n->human_nodename);
-    } else {
-        return C_ERR;
-    }
-    return C_OK;
-}
-
-sds auxHumanNodenameGetter(clusterNode *n, sds s) {
-    return sdscatprintf(s, "%s", n->human_nodename);
-}
-
-int auxHumanNodenamePresent(clusterNode *n) {
-    return sdslen(n->human_nodename);
-}
-
-int auxTcpPortSetter(clusterNode *n, void *value, int length) {
-    if (length > 5 || length < 1) {
-        return C_ERR;
-    }
-    char buf[length + 1];
-    memcpy(buf, (char*)value, length);
-    buf[length] = '\0';
-    n->tcp_port = atoi(buf);
-    return (n->tcp_port < 0 || n->tcp_port >= 65536) ? C_ERR : C_OK;
-}
-
-sds auxTcpPortGetter(clusterNode *n, sds s) {
-    return sdscatprintf(s, "%d", n->tcp_port);
-}
-
-int auxTcpPortPresent(clusterNode *n) {
-    return n->tcp_port >= 0 && n->tcp_port < 65536;
-}
-
-int auxTlsPortSetter(clusterNode *n, void *value, int length) {
-    if (length > 5 || length < 1) {
-        return C_ERR;
-    }
-    char buf[length + 1];
-    memcpy(buf, (char*)value, length);
-    buf[length] = '\0';
-    n->tls_port = atoi(buf);
-    return (n->tls_port < 0 || n->tls_port >= 65536) ? C_ERR : C_OK;
-}
-
-sds auxTlsPortGetter(clusterNode *n, sds s) {
-    return sdscatprintf(s, "%d", n->tls_port);
-}
-
-int auxTlsPortPresent(clusterNode *n) {
-    return n->tls_port >= 0 && n->tls_port < 65536;
-}
-
-/* clusterLink send queue blocks */
-typedef struct {
-    size_t totlen; /* Total length of this block including the message */
-    int refcount;  /* Number of cluster link send msg queues containing the message */
-    clusterMsg msg;
-} clusterMsgSendBlock;
-
-/* -----------------------------------------------------------------------------
- * Initialization
- * -------------------------------------------------------------------------- */
-
-/* Load the cluster config from 'filename'.
- *
- * If the file does not exist or is zero-length (this may happen because
- * when we lock the nodes.conf file, we create a zero-length one for the
- * sake of locking if it does not already exist), C_ERR is returned.
- * If the configuration was loaded from the file, C_OK is returned. */
-int clusterLoadConfig(char *filename) {
-    FILE *fp = fopen(filename,"r");
-    struct stat sb;
-    char *line;
-    int maxline, j;
-
-    if (fp == NULL) {
-        if (errno == ENOENT) {
-            return C_ERR;
-        } else {
-            serverLog(LL_WARNING,
-                "Loading the cluster node config from %s: %s",
-                filename, strerror(errno));
-            exit(1);
-        }
-    }
-
-    if (redis_fstat(fileno(fp),&sb) == -1) {
-        serverLog(LL_WARNING,
-            "Unable to obtain the cluster node config file stat %s: %s",
-            filename, strerror(errno));
-        exit(1);
-    }
-    /* Check if the file is zero-length: if so return C_ERR to signal
-     * we have to write the config. */
-    if (sb.st_size == 0) {
-        fclose(fp);
-        return C_ERR;
-    }
-
-    /* Parse the file. Note that single lines of the cluster config file can
-     * be really long as they include all the hash slots of the node.
-     * This means in the worst possible case, half of the Redis slots will be
-     * present in a single line, possibly in importing or migrating state, so
-     * together with the node ID of the sender/receiver.
-     *
-     * To simplify we allocate 1024+CLUSTER_SLOTS*128 bytes per line. */
-    maxline = 1024+CLUSTER_SLOTS*128;
-    line = zmalloc(maxline);
-    while(fgets(line,maxline,fp) != NULL) {
-        int argc, aux_argc;
-        sds *argv, *aux_argv;
-        clusterNode *n, *master;
-        char *p, *s;
-
-        /* Skip blank lines, they can be created either by users manually
-         * editing nodes.conf or by the config writing process if stopped
-         * before the truncate() call. */
-        if (line[0] == '\n' || line[0] == '\0') continue;
-
-        /* Split the line into arguments for processing. */
-        argv = sdssplitargs(line,&argc);
-        if (argv == NULL) goto fmterr;
-
-        /* Handle the special "vars" line. Don't pretend it is the last
-         * line even if it actually is when generated by Redis. */
-        if (strcasecmp(argv[0],"vars") == 0) {
-            if (!(argc % 2)) goto fmterr;
-            for (j = 1; j < argc; j += 2) {
-                if (strcasecmp(argv[j],"currentEpoch") == 0) {
-                    server.cluster->currentEpoch =
-                            strtoull(argv[j+1],NULL,10);
-                } else if (strcasecmp(argv[j],"lastVoteEpoch") == 0) {
-                    server.cluster->lastVoteEpoch =
-                            strtoull(argv[j+1],NULL,10);
-                } else {
-                    serverLog(LL_NOTICE,
-                        "Skipping unknown cluster config variable '%s'",
-                        argv[j]);
-                }
-            }
-            sdsfreesplitres(argv,argc);
-            continue;
-        }
-
-        /* Regular config lines have at least eight fields */
-        if (argc < 8) {
-            sdsfreesplitres(argv,argc);
-            goto fmterr;
-        }
-
-        /* Create this node if it does not exist */
-        if (verifyClusterNodeId(argv[0], sdslen(argv[0])) == C_ERR) {
-            sdsfreesplitres(argv, argc);
-            goto fmterr;
-        }
-        n = clusterLookupNode(argv[0], sdslen(argv[0]));
-        if (!n) {
-            n = createClusterNode(argv[0],0);
-            clusterAddNode(n);
-        }
-        /* Format for the node address and auxiliary argument information:
-         * ip:port[@cport][,hostname][,aux=val]*] */
-
-        aux_argv = sdssplitlen(argv[1], sdslen(argv[1]), ",", 1, &aux_argc);
-        if (aux_argv == NULL) {
-            sdsfreesplitres(argv,argc);
-            goto fmterr;
-        }
-
-        /* Hostname is an optional argument that defines the endpoint
-         * that can be reported to clients instead of IP. */
-        if (aux_argc > 1 && sdslen(aux_argv[1]) > 0) {
-            n->hostname = sdscpy(n->hostname, aux_argv[1]);
-        } else if (sdslen(n->hostname) != 0) {
-            sdsclear(n->hostname);
-        }
-
-        /* All fields after hostname are auxiliary and they take on
-         * the format of "aux=val" where both aux and val can contain
-         * characters that pass the isValidAuxChar check only. The order
-         * of the aux fields is insignificant. */
-        int aux_tcp_port = 0;
-        int aux_tls_port = 0;
-        for (int i = 2; i < aux_argc; i++) {
-            int field_argc;
-            sds *field_argv;
-            field_argv = sdssplitlen(aux_argv[i], sdslen(aux_argv[i]), "=", 1, &field_argc);
-            if (field_argv == NULL || field_argc != 2) {
-                /* Invalid aux field format */
-                if (field_argv != NULL) sdsfreesplitres(field_argv, field_argc);
-                sdsfreesplitres(argv,argc);
-                goto fmterr;
-            }
-
-            /* Validate that both aux and value contain valid characters only */
-            for (unsigned j = 0; j < 2; j++) {
-                if (!isValidAuxString(field_argv[j],sdslen(field_argv[j]))){
-                    /* Invalid aux field format */
-                    sdsfreesplitres(field_argv, field_argc);
-                    sdsfreesplitres(argv,argc);
-                    goto fmterr;
-                }
-            }
-
-            /* Note that we don't expect lots of aux fields in the foreseeable
-             * future so a linear search is completely fine. */
-            int field_found = 0;
-            for (unsigned j = 0; j < numElements(auxFieldHandlers); j++) {
-                if (sdslen(field_argv[0]) != strlen(auxFieldHandlers[j].field) ||
-                    memcmp(field_argv[0], auxFieldHandlers[j].field, sdslen(field_argv[0])) != 0) {
-                    continue;
-                }
-                field_found = 1;
-                aux_tcp_port |= j == af_tcp_port;
-                aux_tls_port |= j == af_tls_port;
-                if (auxFieldHandlers[j].setter(n, field_argv[1], sdslen(field_argv[1])) != C_OK) {
-                    /* Invalid aux field format */
-                    sdsfreesplitres(field_argv, field_argc);
-                    sdsfreesplitres(argv,argc);
-                    goto fmterr;
-                }
-            }
-
-            if (field_found == 0) {
-                /* Invalid aux field format */
-                sdsfreesplitres(field_argv, field_argc);
-                sdsfreesplitres(argv,argc);
-                goto fmterr;
-            }
-
-            sdsfreesplitres(field_argv, field_argc);
-        }
-        /* Address and port */
-        if ((p = strrchr(aux_argv[0],':')) == NULL) {
-            sdsfreesplitres(aux_argv, aux_argc);
-            sdsfreesplitres(argv,argc);
-            goto fmterr;
-        }
-        *p = '\0';
-        memcpy(n->ip,aux_argv[0],strlen(aux_argv[0])+1);
-        char *port = p+1;
-        char *busp = strchr(port,'@');
-        if (busp) {
-            *busp = '\0';
-            busp++;
-        }
-        /* If neither TCP or TLS port is found in aux field, it is considered
-         * an old version of nodes.conf file.*/
-        if (!aux_tcp_port && !aux_tls_port) {
-            if (server.tls_cluster) {
-                n->tls_port = atoi(port);
-            } else {
-                n->tcp_port = atoi(port);
-            }
-        } else if (!aux_tcp_port) {
-            n->tcp_port = atoi(port);
-        } else if (!aux_tls_port) {
-            n->tls_port = atoi(port);
-        }
-        /* In older versions of nodes.conf the "@busport" part is missing.
-         * In this case we set it to the default offset of 10000 from the
-         * base port. */
-        n->cport = busp ? atoi(busp) : (getNodeDefaultClientPort(n) + CLUSTER_PORT_INCR);
-
-        /* The plaintext port for client in a TLS cluster (n->pport) is not
-         * stored in nodes.conf. It is received later over the bus protocol. */
-
-        sdsfreesplitres(aux_argv, aux_argc);
-
-        /* Parse flags */
-        p = s = argv[2];
-        while(p) {
-            p = strchr(s,',');
-            if (p) *p = '\0';
-            if (!strcasecmp(s,"myself")) {
-                serverAssert(server.cluster->myself == NULL);
-                myself = server.cluster->myself = n;
-                n->flags |= CLUSTER_NODE_MYSELF;
-            } else if (!strcasecmp(s,"master")) {
-                n->flags |= CLUSTER_NODE_MASTER;
-            } else if (!strcasecmp(s,"slave")) {
-                n->flags |= CLUSTER_NODE_SLAVE;
-            } else if (!strcasecmp(s,"fail?")) {
-                n->flags |= CLUSTER_NODE_PFAIL;
-            } else if (!strcasecmp(s,"fail")) {
-                n->flags |= CLUSTER_NODE_FAIL;
-                n->fail_time = mstime();
-            } else if (!strcasecmp(s,"handshake")) {
-                n->flags |= CLUSTER_NODE_HANDSHAKE;
-            } else if (!strcasecmp(s,"noaddr")) {
-                n->flags |= CLUSTER_NODE_NOADDR;
-            } else if (!strcasecmp(s,"nofailover")) {
-                n->flags |= CLUSTER_NODE_NOFAILOVER;
-            } else if (!strcasecmp(s,"noflags")) {
-                /* nothing to do */
-            } else {
-                serverPanic("Unknown flag in redis cluster config file");
-            }
-            if (p) s = p+1;
-        }
-
-        /* Get master if any. Set the master and populate master's
-         * slave list. */
-        if (argv[3][0] != '-') {
-            if (verifyClusterNodeId(argv[3], sdslen(argv[3])) == C_ERR) {
-                sdsfreesplitres(argv, argc);
-                goto fmterr;
-            }
-            master = clusterLookupNode(argv[3], sdslen(argv[3]));
-            if (!master) {
-                master = createClusterNode(argv[3],0);
-                clusterAddNode(master);
-            }
-            /* shard_id can be absent if we are loading a nodes.conf generated
-             * by an older version of Redis; we should follow the primary's
-             * shard_id in this case */
-            if (auxFieldHandlers[af_shard_id].isPresent(n) == 0) {
-                memcpy(n->shard_id, master->shard_id, CLUSTER_NAMELEN);
-                clusterAddNodeToShard(master->shard_id, n);
-            } else if (clusterGetNodesInMyShard(master) != NULL &&
-                       memcmp(master->shard_id, n->shard_id, CLUSTER_NAMELEN) != 0)
-            {
-                /* If the primary has been added to a shard, make sure this
-                 * node has the same persisted shard id as the primary. */
-                goto fmterr;
-            }
-            n->slaveof = master;
-            clusterNodeAddSlave(master,n);
-        } else if (auxFieldHandlers[af_shard_id].isPresent(n) == 0) {
-            /* n is a primary but it does not have a persisted shard_id.
-             * This happens if we are loading a nodes.conf generated by
-             * an older version of Redis. We should manually update the
-             * shard membership in this case */
-            clusterAddNodeToShard(n->shard_id, n);
-        }
-
-        /* Set ping sent / pong received timestamps */
-        if (atoi(argv[4])) n->ping_sent = mstime();
-        if (atoi(argv[5])) n->pong_received = mstime();
-
-        /* Set configEpoch for this node.
-         * If the node is a replica, set its config epoch to 0.
-         * If it's a primary, load the config epoch from the configuration file. */
-        n->configEpoch = (nodeIsSlave(n) && n->slaveof) ? 0 : strtoull(argv[6],NULL,10);
-
-        /* Populate hash slots served by this instance. */
-        for (j = 8; j < argc; j++) {
-            int start, stop;
-
-            if (argv[j][0] == '[') {
-                /* Here we handle migrating / importing slots */
-                int slot;
-                char direction;
-                clusterNode *cn;
-
-                p = strchr(argv[j],'-');
-                serverAssert(p != NULL);
-                *p = '\0';
-                direction = p[1]; /* Either '>' or '<' */
-                slot = atoi(argv[j]+1);
-                if (slot < 0 || slot >= CLUSTER_SLOTS) {
-                    sdsfreesplitres(argv,argc);
-                    goto fmterr;
-                }
-                p += 3;
-
-                char *pr = strchr(p, ']');
-                size_t node_len = pr - p;
-                if (pr == NULL || verifyClusterNodeId(p, node_len) == C_ERR) {
-                    sdsfreesplitres(argv, argc);
-                    goto fmterr;
-                }
-                cn = clusterLookupNode(p, CLUSTER_NAMELEN);
-                if (!cn) {
-                    cn = createClusterNode(p,0);
-                    clusterAddNode(cn);
-                }
-                if (direction == '>') {
-                    server.cluster->migrating_slots_to[slot] = cn;
-                } else {
-                    server.cluster->importing_slots_from[slot] = cn;
-                }
-                continue;
-            } else if ((p = strchr(argv[j],'-')) != NULL) {
-                *p = '\0';
-                start = atoi(argv[j]);
-                stop = atoi(p+1);
-            } else {
-                start = stop = atoi(argv[j]);
-            }
-            if (start < 0 || start >= CLUSTER_SLOTS ||
-                stop < 0 || stop >= CLUSTER_SLOTS)
-            {
-                sdsfreesplitres(argv,argc);
-                goto fmterr;
-            }
-            while(start <= stop) clusterAddSlot(n, start++);
-        }
-
-        sdsfreesplitres(argv,argc);
-    }
-    /* Config sanity check */
-    if (server.cluster->myself == NULL) goto fmterr;
-
-    zfree(line);
-    fclose(fp);
-
-    serverLog(LL_NOTICE,"Node configuration loaded, I'm %.40s", myself->name);
-
-    /* Something that should never happen: currentEpoch smaller than
-     * the max epoch found in the nodes configuration. However we handle this
-     * as some form of protection against manual editing of critical files. */
-    if (clusterGetMaxEpoch() > server.cluster->currentEpoch) {
-        server.cluster->currentEpoch = clusterGetMaxEpoch();
-    }
-    return C_OK;
-
-fmterr:
-    serverLog(LL_WARNING,
-        "Unrecoverable error: corrupted cluster config file \"%s\".", line);
-    zfree(line);
-    if (fp) fclose(fp);
-    exit(1);
-}
-
-/* Cluster node configuration is exactly the same as CLUSTER NODES output.
- *
- * This function writes the node config and returns 0, on error -1
- * is returned.
- *
- * Note: we need to write the file in an atomic way from the point of view
- * of the POSIX filesystem semantics, so that if the server is stopped
- * or crashes during the write, we'll end with either the old file or the
- * new one. Since we have the full payload to write available we can use
- * a single write to write the whole file. If the pre-existing file was
- * bigger we pad our payload with newlines that are anyway ignored and truncate
- * the file afterward. */
-int clusterSaveConfig(int do_fsync) {
-    sds ci,tmpfilename;
-    size_t content_size,offset = 0;
-    ssize_t written_bytes;
-    int fd = -1;
-    int retval = C_ERR;
-
-    server.cluster->todo_before_sleep &= ~CLUSTER_TODO_SAVE_CONFIG;
-
-    /* Get the nodes description and concatenate our "vars" directive to
-     * save currentEpoch and lastVoteEpoch. */
-    ci = clusterGenNodesDescription(NULL, CLUSTER_NODE_HANDSHAKE, 0);
-    ci = sdscatprintf(ci,"vars currentEpoch %llu lastVoteEpoch %llu\n",
-        (unsigned long long) server.cluster->currentEpoch,
-        (unsigned long long) server.cluster->lastVoteEpoch);
-    content_size = sdslen(ci);
-
-    /* Create a temp file with the new content. */
-    tmpfilename = sdscatfmt(sdsempty(),"%s.tmp-%i-%I",
-        server.cluster_configfile,(int) getpid(),mstime());
-    if ((fd = open(tmpfilename,O_WRONLY|O_CREAT,0644)) == -1) {
-        serverLog(LL_WARNING,"Could not open temp cluster config file: %s",strerror(errno));
-        goto cleanup;
-    }
-
-    while (offset < content_size) {
-        written_bytes = write(fd,ci + offset,content_size - offset);
-        if (written_bytes <= 0) {
-            if (errno == EINTR) continue;
-            serverLog(LL_WARNING,"Failed after writing (%zd) bytes to tmp cluster config file: %s",
-                offset,strerror(errno));
-            goto cleanup;
-        }
-        offset += written_bytes;
-    }
-
-    if (do_fsync) {
-        server.cluster->todo_before_sleep &= ~CLUSTER_TODO_FSYNC_CONFIG;
-        if (redis_fsync(fd) == -1) {
-            serverLog(LL_WARNING,"Could not sync tmp cluster config file: %s",strerror(errno));
-            goto cleanup;
-        }
-    }
-
-    if (rename(tmpfilename, server.cluster_configfile) == -1) {
-        serverLog(LL_WARNING,"Could not rename tmp cluster config file: %s",strerror(errno));
-        goto cleanup;
-    }
-
-    if (do_fsync) {
-        if (fsyncFileDir(server.cluster_configfile) == -1) {
-            serverLog(LL_WARNING,"Could not sync cluster config file dir: %s",strerror(errno));
-            goto cleanup;
-        }
-    }
-    retval = C_OK; /* If we reached this point, everything is fine. */
-
-cleanup:
-    if (fd != -1) close(fd);
-    if (retval) unlink(tmpfilename);
-    sdsfree(tmpfilename);
-    sdsfree(ci);
-    return retval;
-}
-
-void clusterSaveConfigOrDie(int do_fsync) {
-    if (clusterSaveConfig(do_fsync) == -1) {
-        serverLog(LL_WARNING,"Fatal: can't update cluster config file.");
-        exit(1);
-    }
-}
-
-/* Lock the cluster config using flock(), and retain the file descriptor used to
- * acquire the lock so that the file will be locked as long as the process is up.
- *
- * This works because we always update nodes.conf with a new version
- * in-place, reopening the file, and writing to it in place (later adjusting
- * the length with ftruncate()).
- *
- * On success C_OK is returned, otherwise an error is logged and
- * the function returns C_ERR to signal a lock was not acquired. */
-int clusterLockConfig(char *filename) {
-/* flock() does not exist on Solaris
- * and a fcntl-based solution won't help, as we constantly re-open that file,
- * which will release _all_ locks anyway
- */
-#if !defined(__sun)
-    /* To lock it, we need to open the file in a way it is created if
-     * it does not exist, otherwise there is a race condition with other
-     * processes. */
-    int fd = open(filename,O_WRONLY|O_CREAT|O_CLOEXEC,0644);
-    if (fd == -1) {
-        serverLog(LL_WARNING,
-            "Can't open %s in order to acquire a lock: %s",
-            filename, strerror(errno));
-        return C_ERR;
-    }
-
-    if (flock(fd,LOCK_EX|LOCK_NB) == -1) {
-        if (errno == EWOULDBLOCK) {
-            serverLog(LL_WARNING,
-                 "Sorry, the cluster configuration file %s is already used "
-                 "by a different Redis Cluster node. Please make sure that "
-                 "different nodes use different cluster configuration "
-                 "files.", filename);
-        } else {
-            serverLog(LL_WARNING,
-                "Impossible to lock %s: %s", filename, strerror(errno));
-        }
-        close(fd);
-        return C_ERR;
-    }
-    /* Lock acquired: leak the 'fd' by not closing it until shutdown time, so that
-     * we'll retain the lock to the file as long as the process exists.
-     *
-     * After fork, the child process will get the fd opened by the parent process,
-     * we need save `fd` to `cluster_config_file_lock_fd`, so that in redisFork(),
-     * it will be closed in the child process.
-     * If it is not closed, when the main process is killed -9, but the child process
-     * (redis-aof-rewrite) is still alive, the fd(lock) will still be held by the
-     * child process, and the main process will fail to get lock, means fail to start. */
-    server.cluster_config_file_lock_fd = fd;
-#else
-    UNUSED(filename);
-#endif /* __sun */
-
-    return C_OK;
-}
-
-/* Derives our ports to be announced in the cluster bus. */
-void deriveAnnouncedPorts(int *announced_tcp_port, int *announced_tls_port,
-                          int *announced_cport) {
-    /* Config overriding announced ports. */
-    *announced_tcp_port = server.cluster_announce_port ? 
-                          server.cluster_announce_port : server.port;
-    *announced_tls_port = server.cluster_announce_tls_port ? 
-                          server.cluster_announce_tls_port : server.tls_port;
-    /* Derive cluster bus port. */
-    if (server.cluster_announce_bus_port) {
-        *announced_cport = server.cluster_announce_bus_port;
-    } else if (server.cluster_port) {
-        *announced_cport = server.cluster_port;
-    } else {
-        *announced_cport = defaultClientPort() + CLUSTER_PORT_INCR;
-    }
-}
-
-/* Some flags (currently just the NOFAILOVER flag) may need to be updated
- * in the "myself" node based on the current configuration of the node,
- * that may change at runtime via CONFIG SET. This function changes the
- * set of flags in myself->flags accordingly. */
-void clusterUpdateMyselfFlags(void) {
-    if (!myself) return;
-    int oldflags = myself->flags;
-    int nofailover = server.cluster_slave_no_failover ?
-                     CLUSTER_NODE_NOFAILOVER : 0;
-    myself->flags &= ~CLUSTER_NODE_NOFAILOVER;
-    myself->flags |= nofailover;
-    if (myself->flags != oldflags) {
-        clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
-                             CLUSTER_TODO_UPDATE_STATE);
-    }
-}
-
-
-/* We want to take myself->port/cport/pport in sync with the
-* cluster-announce-port/cluster-announce-bus-port/cluster-announce-tls-port option.
-* The option can be set at runtime via CONFIG SET. */
-void clusterUpdateMyselfAnnouncedPorts(void) {
-    if (!myself) return;
-    deriveAnnouncedPorts(&myself->tcp_port,&myself->tls_port,&myself->cport);
-}
-
-/* We want to take myself->ip in sync with the cluster-announce-ip option.
-* The option can be set at runtime via CONFIG SET. */
-void clusterUpdateMyselfIp(void) {
-    if (!myself) return;
-    static char *prev_ip = NULL;
-    char *curr_ip = server.cluster_announce_ip;
-    int changed = 0;
-
-    if (prev_ip == NULL && curr_ip != NULL) changed = 1;
-    else if (prev_ip != NULL && curr_ip == NULL) changed = 1;
-    else if (prev_ip && curr_ip && strcmp(prev_ip,curr_ip)) changed = 1;
-
-    if (changed) {
-        if (prev_ip) zfree(prev_ip);
-        prev_ip = curr_ip;
-
-        if (curr_ip) {
-            /* We always take a copy of the previous IP address, by
-            * duplicating the string. This way later we can check if
-            * the address really changed. */
-            prev_ip = zstrdup(prev_ip);
-            redis_strlcpy(myself->ip,server.cluster_announce_ip,NET_IP_STR_LEN);
-        } else {
-            myself->ip[0] = '\0'; /* Force autodetection. */
-        }
-    }
-}
-
-/* Update the hostname for the specified node with the provided C string. */
-static void updateAnnouncedHostname(clusterNode *node, char *new) {
-    /* Previous and new hostname are the same, no need to update. */
-    if (new && !strcmp(new, node->hostname)) {
-        return;
-    } else if (!new && (sdslen(node->hostname) == 0)) {
-        return;
-    }
-
-    if (new) {
-        node->hostname = sdscpy(node->hostname, new);
-    } else if (sdslen(node->hostname) != 0) {
-        sdsclear(node->hostname);
-    }
-    clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
-}
-
-static void updateAnnouncedHumanNodename(clusterNode *node, char *new) {
-    if (new && !strcmp(new, node->human_nodename)) {
-        return;
-    } else if (!new && (sdslen(node->human_nodename) == 0)) {
-        return;
-    }
-    
-    if (new) {
-        node->human_nodename = sdscpy(node->human_nodename, new);
-    } else if (sdslen(node->human_nodename) != 0) {
-        sdsclear(node->human_nodename);
-    }
-    clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
-}
-
-
-static void updateShardId(clusterNode *node, const char *shard_id) {
-    if (memcmp(node->shard_id, shard_id, CLUSTER_NAMELEN) != 0) {
-        clusterRemoveNodeFromShard(node);
-        memcpy(node->shard_id, shard_id, CLUSTER_NAMELEN);
-        clusterAddNodeToShard(shard_id, node);
-        clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
-    }
-    if (myself != node && myself->slaveof == node) {
-        if (memcmp(myself->shard_id, shard_id, CLUSTER_NAMELEN) != 0) {
-            /* shard-id can diverge right after a rolling upgrade
-             * from pre-7.2 releases */
-            clusterRemoveNodeFromShard(myself);
-            memcpy(myself->shard_id, shard_id, CLUSTER_NAMELEN);
-            clusterAddNodeToShard(shard_id, myself);
-            clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|CLUSTER_TODO_FSYNC_CONFIG);
-        }
-    }
-}
-
-/* Update my hostname based on server configuration values */
-void clusterUpdateMyselfHostname(void) {
-    if (!myself) return;
-    updateAnnouncedHostname(myself, server.cluster_announce_hostname);
-}
-
-void clusterUpdateMyselfHumanNodename(void) {
-    if (!myself) return;
-    updateAnnouncedHumanNodename(myself, server.cluster_announce_human_nodename);
-}
-
-void clusterInit(void) {
-    int saveconf = 0;
-
-    server.cluster = zmalloc(sizeof(clusterState));
-    server.cluster->myself = NULL;
-    server.cluster->currentEpoch = 0;
-    server.cluster->state = CLUSTER_FAIL;
-    server.cluster->size = 1;
-    server.cluster->todo_before_sleep = 0;
-    server.cluster->nodes = dictCreate(&clusterNodesDictType);
-    server.cluster->shards = dictCreate(&clusterSdsToListType);
-    server.cluster->nodes_black_list =
-        dictCreate(&clusterNodesBlackListDictType);
-    server.cluster->failover_auth_time = 0;
-    server.cluster->failover_auth_count = 0;
-    server.cluster->failover_auth_rank = 0;
-    server.cluster->failover_auth_epoch = 0;
-    server.cluster->cant_failover_reason = CLUSTER_CANT_FAILOVER_NONE;
-    server.cluster->lastVoteEpoch = 0;
-
-    /* Initialize stats */
-    for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) {
-        server.cluster->stats_bus_messages_sent[i] = 0;
-        server.cluster->stats_bus_messages_received[i] = 0;
-    }
-    server.cluster->stats_pfail_nodes = 0;
-    server.cluster->stat_cluster_links_buffer_limit_exceeded = 0;
-
-    memset(server.cluster->slots,0, sizeof(server.cluster->slots));
-    clusterCloseAllSlots();
-
-    /* Lock the cluster config file to make sure every node uses
-     * its own nodes.conf. */
-    server.cluster_config_file_lock_fd = -1;
-    if (clusterLockConfig(server.cluster_configfile) == C_ERR)
-        exit(1);
-
-    /* Load or create a new nodes configuration. */
-    if (clusterLoadConfig(server.cluster_configfile) == C_ERR) {
-        /* No configuration found. We will just use the random name provided
-         * by the createClusterNode() function. */
-        myself = server.cluster->myself =
-            createClusterNode(NULL,CLUSTER_NODE_MYSELF|CLUSTER_NODE_MASTER);
-        serverLog(LL_NOTICE,"No cluster configuration found, I'm %.40s",
-            myself->name);
-        clusterAddNode(myself);
-        clusterAddNodeToShard(myself->shard_id, myself);
-        saveconf = 1;
-    }
-    if (saveconf) clusterSaveConfigOrDie(1);
-
-    /* Port sanity check II
-     * The other handshake port check is triggered too late to stop
-     * us from trying to use a too-high cluster port number. */
-    int port = defaultClientPort();
-    if (!server.cluster_port && port > (65535-CLUSTER_PORT_INCR)) {
-        serverLog(LL_WARNING, "Redis port number too high. "
-                   "Cluster communication port is 10,000 port "
-                   "numbers higher than your Redis port. "
-                   "Your Redis port number must be 55535 or less.");
-        exit(1);
-    }
-    if (!server.bindaddr_count) {
-        serverLog(LL_WARNING, "No bind address is configured, but it is required for the Cluster bus.");
-        exit(1);
-    }
-
-    /* Initialize data for the Slot to key API. */
-    slotToKeyInit(server.db);
-
-    /* The slots -> channels map is a radix tree. Initialize it here. */
-    server.cluster->slots_to_channels = raxNew();
-
-    /* Set myself->port/cport/pport to my listening ports, we'll just need to
-     * discover the IP address via MEET messages. */
-    deriveAnnouncedPorts(&myself->tcp_port, &myself->tls_port, &myself->cport);
-
-    server.cluster->mf_end = 0;
-    server.cluster->mf_slave = NULL;
-    resetManualFailover();
-    clusterUpdateMyselfFlags();
-    clusterUpdateMyselfIp();
-    clusterUpdateMyselfHostname();
-    clusterUpdateMyselfHumanNodename();
-}
-
-void clusterInitListeners(void) {
-    if (connectionIndexByType(connTypeOfCluster()->get_type(NULL)) < 0) {
-        serverLog(LL_WARNING, "Missing connection type %s, but it is required for the Cluster bus.", connTypeOfCluster()->get_type(NULL));
-        exit(1);
-    }
-
-    int port = defaultClientPort();
-    connListener *listener = &server.clistener;
-    listener->count = 0;
-    listener->bindaddr = server.bindaddr;
-    listener->bindaddr_count = server.bindaddr_count;
-    listener->port = server.cluster_port ? server.cluster_port : port + CLUSTER_PORT_INCR;
-    listener->ct = connTypeOfCluster();
-    if (connListen(listener) == C_ERR ) {
-        /* Note: the following log text is matched by the test suite. */
-        serverLog(LL_WARNING, "Failed listening on port %u (cluster), aborting.", listener->port);
-        exit(1);
-    }
-    
-    if (createSocketAcceptHandler(&server.clistener, clusterAcceptHandler) != C_OK) {
-        serverPanic("Unrecoverable error creating Redis Cluster socket accept handler.");
-    }
-}
-
-/* Reset a node performing a soft or hard reset:
- *
- * 1) All other nodes are forgotten.
- * 2) All the assigned / open slots are released.
- * 3) If the node is a slave, it turns into a master.
- * 4) Only for hard reset: a new Node ID is generated.
- * 5) Only for hard reset: currentEpoch and configEpoch are set to 0.
- * 6) The new configuration is saved and the cluster state updated.
- * 7) If the node was a slave, the whole data set is flushed away. */
-void clusterReset(int hard) {
-    dictIterator *di;
-    dictEntry *de;
-    int j;
-
-    /* Turn into master. */
-    if (nodeIsSlave(myself)) {
-        clusterSetNodeAsMaster(myself);
-        replicationUnsetMaster();
-        emptyData(-1,EMPTYDB_NO_FLAGS,NULL);
-    }
-
-    /* Close slots, reset manual failover state. */
-    clusterCloseAllSlots();
-    resetManualFailover();
-
-    /* Unassign all the slots. */
-    for (j = 0; j < CLUSTER_SLOTS; j++) clusterDelSlot(j);
-
-    /* Recreate shards dict */
-    dictEmpty(server.cluster->shards, NULL);
-
-    /* Forget all the nodes, but myself. */
-    di = dictGetSafeIterator(server.cluster->nodes);
-    while((de = dictNext(di)) != NULL) {
-        clusterNode *node = dictGetVal(de);
-
-        if (node == myself) continue;
-        clusterDelNode(node);
-    }
-    dictReleaseIterator(di);
-
-    /* Hard reset only: set epochs to 0, change node ID. */
-    if (hard) {
-        sds oldname;
-
-        server.cluster->currentEpoch = 0;
-        server.cluster->lastVoteEpoch = 0;
-        myself->configEpoch = 0;
-        serverLog(LL_NOTICE, "configEpoch set to 0 via CLUSTER RESET HARD");
-
-        /* To change the Node ID we need to remove the old name from the
-         * nodes table, change the ID, and re-add back with new name. */
-        oldname = sdsnewlen(myself->name, CLUSTER_NAMELEN);
-        dictDelete(server.cluster->nodes,oldname);
-        sdsfree(oldname);
-        getRandomHexChars(myself->name, CLUSTER_NAMELEN);
-        getRandomHexChars(myself->shard_id, CLUSTER_NAMELEN);
-        clusterAddNode(myself);
-        serverLog(LL_NOTICE,"Node hard reset, now I'm %.40s", myself->name);
-    }
-
-    /* Re-populate shards */
-    clusterAddNodeToShard(myself->shard_id, myself);
-
-    /* Make sure to persist the new config and update the state. */
-    clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
-                         CLUSTER_TODO_UPDATE_STATE|
-                         CLUSTER_TODO_FSYNC_CONFIG);
-}
-
-/* -----------------------------------------------------------------------------
- * CLUSTER communication link
- * -------------------------------------------------------------------------- */
-static clusterMsgSendBlock *createClusterMsgSendBlock(int type, uint32_t msglen) {
-    uint32_t blocklen = msglen + sizeof(clusterMsgSendBlock) - sizeof(clusterMsg);
-    clusterMsgSendBlock *msgblock = zcalloc(blocklen);
-    msgblock->refcount = 1;
-    msgblock->totlen = blocklen;
-    server.stat_cluster_links_memory += blocklen;
-    clusterBuildMessageHdr(&msgblock->msg,type,msglen);
-    return msgblock;
-}
-
-static void clusterMsgSendBlockDecrRefCount(void *node) {
-    clusterMsgSendBlock *msgblock = (clusterMsgSendBlock*)node;
-    msgblock->refcount--;
-    serverAssert(msgblock->refcount >= 0);
-    if (msgblock->refcount == 0) {
-        server.stat_cluster_links_memory -= msgblock->totlen;
-        zfree(msgblock);
-    }
-}
-
-clusterLink *createClusterLink(clusterNode *node) {
-    clusterLink *link = zmalloc(sizeof(*link));
-    link->ctime = mstime();
-    link->send_msg_queue = listCreate();
-    listSetFreeMethod(link->send_msg_queue, clusterMsgSendBlockDecrRefCount);
-    link->head_msg_send_offset = 0;
-    link->send_msg_queue_mem = sizeof(list);
-    link->rcvbuf = zmalloc(link->rcvbuf_alloc = RCVBUF_INIT_LEN);
-    link->rcvbuf_len = 0;
-    server.stat_cluster_links_memory += link->rcvbuf_alloc + link->send_msg_queue_mem;
-    link->conn = NULL;
-    link->node = node;
-    /* Related node can only possibly be known at link creation time if this is an outbound link */
-    link->inbound = (node == NULL);
-    if (!link->inbound) {
-        node->link = link;
-    }
-    return link;
-}
-
-/* Free a cluster link, but does not free the associated node of course.
- * This function will just make sure that the original node associated
- * with this link will have the 'link' field set to NULL. */
-void freeClusterLink(clusterLink *link) {
-    if (link->conn) {
-        connClose(link->conn);
-        link->conn = NULL;
-    }
-    server.stat_cluster_links_memory -= sizeof(list) + listLength(link->send_msg_queue)*sizeof(listNode);
-    listRelease(link->send_msg_queue);
-    server.stat_cluster_links_memory -= link->rcvbuf_alloc;
-    zfree(link->rcvbuf);
-    if (link->node) {
-        if (link->node->link == link) {
-            serverAssert(!link->inbound);
-            link->node->link = NULL;
-        } else if (link->node->inbound_link == link) {
-            serverAssert(link->inbound);
-            link->node->inbound_link = NULL;
-        }
-    }
-    zfree(link);
-}
-
-void setClusterNodeToInboundClusterLink(clusterNode *node, clusterLink *link) {
-    serverAssert(!link->node);
-    serverAssert(link->inbound);
-    if (node->inbound_link) {
-        /* A peer may disconnect and then reconnect with us, and it's not guaranteed that
-         * we would always process the disconnection of the existing inbound link before
-         * accepting a new existing inbound link. Therefore, it's possible to have more than
-         * one inbound link from the same node at the same time. Our cleanup logic assumes
-         * a one to one relationship between nodes and inbound links, so we need to kill
-         * one of the links. The existing link is more likely the outdated one, but it's
-         * possible the other node may need to open another link. */
-        serverLog(LL_DEBUG, "Replacing inbound link fd %d from node %.40s with fd %d",
-                node->inbound_link->conn->fd, node->name, link->conn->fd);
-        freeClusterLink(node->inbound_link);
-    }
-    serverAssert(!node->inbound_link);
-    node->inbound_link = link;
-    link->node = node;
-}
-
-static void clusterConnAcceptHandler(connection *conn) {
-    clusterLink *link;
-
-    if (connGetState(conn) != CONN_STATE_CONNECTED) {
-        serverLog(LL_VERBOSE,
-                "Error accepting cluster node connection: %s", connGetLastError(conn));
-        connClose(conn);
-        return;
-    }
-
-    /* Create a link object we use to handle the connection.
-     * It gets passed to the readable handler when data is available.
-     * Initially the link->node pointer is set to NULL as we don't know
-     * which node is, but the right node is references once we know the
-     * node identity. */
-    link = createClusterLink(NULL);
-    link->conn = conn;
-    connSetPrivateData(conn, link);
-
-    /* Register read handler */
-    connSetReadHandler(conn, clusterReadHandler);
-}
-
-#define MAX_CLUSTER_ACCEPTS_PER_CALL 1000
-void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
-    int cport, cfd;
-    int max = MAX_CLUSTER_ACCEPTS_PER_CALL;
-    char cip[NET_IP_STR_LEN];
-    int require_auth = TLS_CLIENT_AUTH_YES;
-    UNUSED(el);
-    UNUSED(mask);
-    UNUSED(privdata);
-
-    /* If the server is starting up, don't accept cluster connections:
-     * UPDATE messages may interact with the database content. */
-    if (server.masterhost == NULL && server.loading) return;
-
-    while(max--) {
-        cfd = anetTcpAccept(server.neterr, fd, cip, sizeof(cip), &cport);
-        if (cfd == ANET_ERR) {
-            if (errno != EWOULDBLOCK)
-                serverLog(LL_VERBOSE,
-                    "Error accepting cluster node: %s", server.neterr);
-            return;
-        }
-
-        connection *conn = connCreateAccepted(connTypeOfCluster(), cfd, &require_auth);
-
-        /* Make sure connection is not in an error state */
-        if (connGetState(conn) != CONN_STATE_ACCEPTING) {
-            serverLog(LL_VERBOSE,
-                "Error creating an accepting connection for cluster node: %s",
-                    connGetLastError(conn));
-            connClose(conn);
-            return;
-        }
-        connEnableTcpNoDelay(conn);
-        connKeepAlive(conn,server.cluster_node_timeout / 1000 * 2);
-
-        /* Use non-blocking I/O for cluster messages. */
-        serverLog(LL_VERBOSE,"Accepting cluster node connection from %s:%d", cip, cport);
-
-        /* Accept the connection now.  connAccept() may call our handler directly
-         * or schedule it for later depending on connection implementation.
-         */
-        if (connAccept(conn, clusterConnAcceptHandler) == C_ERR) {
-            if (connGetState(conn) == CONN_STATE_ERROR)
-                serverLog(LL_VERBOSE,
-                        "Error accepting cluster node connection: %s",
-                        connGetLastError(conn));
-            connClose(conn);
-            return;
-        }
-    }
-}
-
-/* Return the approximated number of sockets we are using in order to
- * take the cluster bus connections. */
-unsigned long getClusterConnectionsCount(void) {
-    /* We decrement the number of nodes by one, since there is the
-     * "myself" node too in the list. Each node uses two file descriptors,
-     * one incoming and one outgoing, thus the multiplication by 2. */
-    return server.cluster_enabled ?
-           ((dictSize(server.cluster->nodes)-1)*2) : 0;
-}
-
-/* -----------------------------------------------------------------------------
- * Key space handling
- * -------------------------------------------------------------------------- */
-
-/* We have 16384 hash slots. The hash slot of a given key is obtained
- * as the least significant 14 bits of the crc16 of the key.
- *
- * However if the key contains the {...} pattern, only the part between
- * { and } is hashed. This may be useful in the future to force certain
- * keys to be in the same node (assuming no resharding is in progress). */
-unsigned int keyHashSlot(char *key, int keylen) {
-    int s, e; /* start-end indexes of { and } */
-
-    for (s = 0; s < keylen; s++)
-        if (key[s] == '{') break;
-
-    /* No '{' ? Hash the whole key. This is the base case. */
-    if (s == keylen) return crc16(key,keylen) & 0x3FFF;
-
-    /* '{' found? Check if we have the corresponding '}'. */
-    for (e = s+1; e < keylen; e++)
-        if (key[e] == '}') break;
-
-    /* No '}' or nothing between {} ? Hash the whole key. */
-    if (e == keylen || e == s+1) return crc16(key,keylen) & 0x3FFF;
-
-    /* If we are here there is both a { and a } on its right. Hash
-     * what is in the middle between { and }. */
-    return crc16(key+s+1,e-s-1) & 0x3FFF;
-}
-
-/* -----------------------------------------------------------------------------
- * CLUSTER node API
- * -------------------------------------------------------------------------- */
-
-/* Create a new cluster node, with the specified flags.
- * If "nodename" is NULL this is considered a first handshake and a random
- * node name is assigned to this node (it will be fixed later when we'll
- * receive the first pong).
- *
- * The node is created and returned to the user, but it is not automatically
- * added to the nodes hash table. */
-clusterNode *createClusterNode(char *nodename, int flags) {
-    clusterNode *node = zmalloc(sizeof(*node));
-
-    if (nodename)
-        memcpy(node->name, nodename, CLUSTER_NAMELEN);
-    else
-        getRandomHexChars(node->name, CLUSTER_NAMELEN);
-    getRandomHexChars(node->shard_id, CLUSTER_NAMELEN);
-    node->ctime = mstime();
-    node->configEpoch = 0;
-    node->flags = flags;
-    memset(node->slots,0,sizeof(node->slots));
-    node->slot_info_pairs = NULL;
-    node->slot_info_pairs_count = 0;
-    node->numslots = 0;
-    node->numslaves = 0;
-    node->slaves = NULL;
-    node->slaveof = NULL;
-    node->last_in_ping_gossip = 0;
-    node->ping_sent = node->pong_received = 0;
-    node->data_received = 0;
-    node->fail_time = 0;
-    node->link = NULL;
-    node->inbound_link = NULL;
-    memset(node->ip,0,sizeof(node->ip));
-    node->hostname = sdsempty();
-    node->human_nodename = sdsempty();
-    node->tcp_port = 0;
-    node->cport = 0;
-    node->tls_port = 0;
-    node->fail_reports = listCreate();
-    node->voted_time = 0;
-    node->orphaned_time = 0;
-    node->repl_offset_time = 0;
-    node->repl_offset = 0;
-    listSetFreeMethod(node->fail_reports,zfree);
-    return node;
-}
-
-/* This function is called every time we get a failure report from a node.
- * The side effect is to populate the fail_reports list (or to update
- * the timestamp of an existing report).
- *
- * 'failing' is the node that is in failure state according to the
- * 'sender' node.
- *
- * The function returns 0 if it just updates a timestamp of an existing
- * failure report from the same sender. 1 is returned if a new failure
- * report is created. */
-int clusterNodeAddFailureReport(clusterNode *failing, clusterNode *sender) {
-    list *l = failing->fail_reports;
-    listNode *ln;
-    listIter li;
-    clusterNodeFailReport *fr;
-
-    /* If a failure report from the same sender already exists, just update
-     * the timestamp. */
-    listRewind(l,&li);
-    while ((ln = listNext(&li)) != NULL) {
-        fr = ln->value;
-        if (fr->node == sender) {
-            fr->time = mstime();
-            return 0;
-        }
-    }
-
-    /* Otherwise create a new report. */
-    fr = zmalloc(sizeof(*fr));
-    fr->node = sender;
-    fr->time = mstime();
-    listAddNodeTail(l,fr);
-    return 1;
-}
-
-/* Remove failure reports that are too old, where too old means reasonably
- * older than the global node timeout. Note that anyway for a node to be
- * flagged as FAIL we need to have a local PFAIL state that is at least
- * older than the global node timeout, so we don't just trust the number
- * of failure reports from other nodes. */
-void clusterNodeCleanupFailureReports(clusterNode *node) {
-    list *l = node->fail_reports;
-    listNode *ln;
-    listIter li;
-    clusterNodeFailReport *fr;
-    mstime_t maxtime = server.cluster_node_timeout *
-                     CLUSTER_FAIL_REPORT_VALIDITY_MULT;
-    mstime_t now = mstime();
-
-    listRewind(l,&li);
-    while ((ln = listNext(&li)) != NULL) {
-        fr = ln->value;
-        if (now - fr->time > maxtime) listDelNode(l,ln);
-    }
-}
-
-/* Remove the failing report for 'node' if it was previously considered
- * failing by 'sender'. This function is called when a node informs us via
- * gossip that a node is OK from its point of view (no FAIL or PFAIL flags).
- *
- * Note that this function is called relatively often as it gets called even
- * when there are no nodes failing, and is O(N), however when the cluster is
- * fine the failure reports list is empty so the function runs in constant
- * time.
- *
- * The function returns 1 if the failure report was found and removed.
- * Otherwise 0 is returned. */
-int clusterNodeDelFailureReport(clusterNode *node, clusterNode *sender) {
-    list *l = node->fail_reports;
-    listNode *ln;
-    listIter li;
-    clusterNodeFailReport *fr;
-
-    /* Search for a failure report from this sender. */
-    listRewind(l,&li);
-    while ((ln = listNext(&li)) != NULL) {
-        fr = ln->value;
-        if (fr->node == sender) break;
-    }
-    if (!ln) return 0; /* No failure report from this sender. */
-
-    /* Remove the failure report. */
-    listDelNode(l,ln);
-    clusterNodeCleanupFailureReports(node);
-    return 1;
-}
-
-/* Return the number of external nodes that believe 'node' is failing,
- * not including this node, that may have a PFAIL or FAIL state for this
- * node as well. */
-int clusterNodeFailureReportsCount(clusterNode *node) {
-    clusterNodeCleanupFailureReports(node);
-    return listLength(node->fail_reports);
-}
-
-int clusterNodeRemoveSlave(clusterNode *master, clusterNode *slave) {
-    int j;
-
-    for (j = 0; j < master->numslaves; j++) {
-        if (master->slaves[j] == slave) {
-            if ((j+1) < master->numslaves) {
-                int remaining_slaves = (master->numslaves - j) - 1;
-                memmove(master->slaves+j,master->slaves+(j+1),
-                        (sizeof(*master->slaves) * remaining_slaves));
-            }
-            master->numslaves--;
-            if (master->numslaves == 0)
-                master->flags &= ~CLUSTER_NODE_MIGRATE_TO;
-            return C_OK;
-        }
-    }
-    return C_ERR;
-}
-
-int clusterNodeAddSlave(clusterNode *master, clusterNode *slave) {
-    int j;
-
-    /* If it's already a slave, don't add it again. */
-    for (j = 0; j < master->numslaves; j++)
-        if (master->slaves[j] == slave) return C_ERR;
-    master->slaves = zrealloc(master->slaves,
-        sizeof(clusterNode*)*(master->numslaves+1));
-    master->slaves[master->numslaves] = slave;
-    master->numslaves++;
-    master->flags |= CLUSTER_NODE_MIGRATE_TO;
-    return C_OK;
-}
-
-int clusterCountNonFailingSlaves(clusterNode *n) {
-    int j, okslaves = 0;
-
-    for (j = 0; j < n->numslaves; j++)
-        if (!nodeFailed(n->slaves[j])) okslaves++;
-    return okslaves;
-}
-
-/* Low level cleanup of the node structure. Only called by clusterDelNode(). */
-void freeClusterNode(clusterNode *n) {
-    sds nodename;
-    int j;
-
-    /* If the node has associated slaves, we have to set
-     * all the slaves->slaveof fields to NULL (unknown). */
-    for (j = 0; j < n->numslaves; j++)
-        n->slaves[j]->slaveof = NULL;
-
-    /* Remove this node from the list of slaves of its master. */
-    if (nodeIsSlave(n) && n->slaveof) clusterNodeRemoveSlave(n->slaveof,n);
-
-    /* Unlink from the set of nodes. */
-    nodename = sdsnewlen(n->name, CLUSTER_NAMELEN);
-    serverAssert(dictDelete(server.cluster->nodes,nodename) == DICT_OK);
-    sdsfree(nodename);
-    sdsfree(n->hostname);
-    sdsfree(n->human_nodename);
-
-    /* Release links and associated data structures. */
-    if (n->link) freeClusterLink(n->link);
-    if (n->inbound_link) freeClusterLink(n->inbound_link);
-    listRelease(n->fail_reports);
-    zfree(n->slaves);
-    zfree(n);
-}
-
-/* Add a node to the nodes hash table */
-void clusterAddNode(clusterNode *node) {
-    int retval;
-
-    retval = dictAdd(server.cluster->nodes,
-            sdsnewlen(node->name,CLUSTER_NAMELEN), node);
-    serverAssert(retval == DICT_OK);
-}
-
-/* Remove a node from the cluster. The function performs the high level
- * cleanup, calling freeClusterNode() for the low level cleanup.
- * Here we do the following:
- *
- * 1) Mark all the slots handled by it as unassigned.
- * 2) Remove all the failure reports sent by this node and referenced by
- *    other nodes.
- * 3) Remove the node from the owning shard
- * 4) Free the node with freeClusterNode() that will in turn remove it
- *    from the hash table and from the list of slaves of its master, if
- *    it is a slave node.
- */
-void clusterDelNode(clusterNode *delnode) {
-    int j;
-    dictIterator *di;
-    dictEntry *de;
-
-    /* 1) Mark slots as unassigned. */
-    for (j = 0; j < CLUSTER_SLOTS; j++) {
-        if (server.cluster->importing_slots_from[j] == delnode)
-            server.cluster->importing_slots_from[j] = NULL;
-        if (server.cluster->migrating_slots_to[j] == delnode)
-            server.cluster->migrating_slots_to[j] = NULL;
-        if (server.cluster->slots[j] == delnode)
-            clusterDelSlot(j);
-    }
-
-    /* 2) Remove failure reports. */
-    di = dictGetSafeIterator(server.cluster->nodes);
-    while((de = dictNext(di)) != NULL) {
-        clusterNode *node = dictGetVal(de);
-
-        if (node == delnode) continue;
-        clusterNodeDelFailureReport(node,delnode);
-    }
-    dictReleaseIterator(di);
-
-    /* 3) Remove the node from the owning shard */
-    clusterRemoveNodeFromShard(delnode);
-
-    /* 4) Free the node, unlinking it from the cluster. */
-    freeClusterNode(delnode);
-}
-
-/* Cluster node sanity check. Returns C_OK if the node id
- * is valid an C_ERR otherwise. */
-int verifyClusterNodeId(const char *name, int length) {
-    if (length != CLUSTER_NAMELEN) return C_ERR;
-    for (int i = 0; i < length; i++) {
-        if (name[i] >= 'a' && name[i] <= 'z') continue;
-        if (name[i] >= '0' && name[i] <= '9') continue;
-        return C_ERR;
-    }
-    return C_OK;
-}
-
-/* Node lookup by name */
-clusterNode *clusterLookupNode(const char *name, int length) {
-    if (verifyClusterNodeId(name, length) != C_OK) return NULL;
-    sds s = sdsnewlen(name, length);
-    dictEntry *de = dictFind(server.cluster->nodes, s);
-    sdsfree(s);
-    if (de == NULL) return NULL;
-    return dictGetVal(de);
-}
-
-/* Get all the nodes in my shard.
- * Note that the list returned is not computed on the fly
- * via slaveof; rather, it is maintained permanently to
- * track the shard membership and its life cycle is tied
- * to this Redis process. Therefore, the caller must not
- * release the list. */
-list *clusterGetNodesInMyShard(clusterNode *node) {
-    sds s = sdsnewlen(node->shard_id, CLUSTER_NAMELEN);
-    dictEntry *de = dictFind(server.cluster->shards,s);
-    sdsfree(s);
-    return (de != NULL) ? dictGetVal(de) : NULL;
-}
-
-/* This is only used after the handshake. When we connect a given IP/PORT
- * as a result of CLUSTER MEET we don't have the node name yet, so we
- * pick a random one, and will fix it when we receive the PONG request using
- * this function. */
-void clusterRenameNode(clusterNode *node, char *newname) {
-    int retval;
-    sds s = sdsnewlen(node->name, CLUSTER_NAMELEN);
-
-    serverLog(LL_DEBUG,"Renaming node %.40s into %.40s",
-        node->name, newname);
-    retval = dictDelete(server.cluster->nodes, s);
-    sdsfree(s);
-    serverAssert(retval == DICT_OK);
-    memcpy(node->name, newname, CLUSTER_NAMELEN);
-    clusterAddNode(node);
-}
-
-void clusterAddNodeToShard(const char *shard_id, clusterNode *node) {
-    sds s = sdsnewlen(shard_id, CLUSTER_NAMELEN);
-    dictEntry *de = dictFind(server.cluster->shards,s);
-    if (de == NULL) {
-        list *l = listCreate();
-        listAddNodeTail(l, node);
-        serverAssert(dictAdd(server.cluster->shards, s, l) == DICT_OK);
-    } else {
-        list *l = dictGetVal(de);
-        if (listSearchKey(l, node) == NULL) {
-            listAddNodeTail(l, node);
-        }
-        sdsfree(s);
-    }
-}
-
-void clusterRemoveNodeFromShard(clusterNode *node) {
-    sds s = sdsnewlen(node->shard_id, CLUSTER_NAMELEN);
-    dictEntry *de = dictFind(server.cluster->shards, s);
-    if (de != NULL) {
-        list *l = dictGetVal(de);
-        listNode *ln = listSearchKey(l, node);
-        if (ln != NULL) {
-            listDelNode(l, ln);
-        }
-        if (listLength(l) == 0) {
-            dictDelete(server.cluster->shards, s);
-        }
-    }
-    sdsfree(s);
-}
-
-/* -----------------------------------------------------------------------------
- * CLUSTER config epoch handling
- * -------------------------------------------------------------------------- */
-
-/* Return the greatest configEpoch found in the cluster, or the current
- * epoch if greater than any node configEpoch. */
-uint64_t clusterGetMaxEpoch(void) {
-    uint64_t max = 0;
-    dictIterator *di;
-    dictEntry *de;
-
-    di = dictGetSafeIterator(server.cluster->nodes);
-    while((de = dictNext(di)) != NULL) {
-        clusterNode *node = dictGetVal(de);
-        if (node->configEpoch > max) max = node->configEpoch;
-    }
-    dictReleaseIterator(di);
-    if (max < server.cluster->currentEpoch) max = server.cluster->currentEpoch;
-    return max;
-}
-
-/* If this node epoch is zero or is not already the greatest across the
- * cluster (from the POV of the local configuration), this function will:
- *
- * 1) Generate a new config epoch, incrementing the current epoch.
- * 2) Assign the new epoch to this node, WITHOUT any consensus.
- * 3) Persist the configuration on disk before sending packets with the
- *    new configuration.
- *
- * If the new config epoch is generated and assigned, C_OK is returned,
- * otherwise C_ERR is returned (since the node has already the greatest
- * configuration around) and no operation is performed.
- *
- * Important note: this function violates the principle that config epochs
- * should be generated with consensus and should be unique across the cluster.
- * However Redis Cluster uses this auto-generated new config epochs in two
- * cases:
- *
- * 1) When slots are closed after importing. Otherwise resharding would be
- *    too expensive.
- * 2) When CLUSTER FAILOVER is called with options that force a slave to
- *    failover its master even if there is not master majority able to
- *    create a new configuration epoch.
- *
- * Redis Cluster will not explode using this function, even in the case of
- * a collision between this node and another node, generating the same
- * configuration epoch unilaterally, because the config epoch conflict
- * resolution algorithm will eventually move colliding nodes to different
- * config epochs. However using this function may violate the "last failover
- * wins" rule, so should only be used with care. */
-int clusterBumpConfigEpochWithoutConsensus(void) {
-    uint64_t maxEpoch = clusterGetMaxEpoch();
-
-    if (myself->configEpoch == 0 ||
-        myself->configEpoch != maxEpoch)
-    {
-        server.cluster->currentEpoch++;
-        myself->configEpoch = server.cluster->currentEpoch;
-        clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
-                             CLUSTER_TODO_FSYNC_CONFIG);
-        serverLog(LL_NOTICE,
-            "New configEpoch set to %llu",
-            (unsigned long long) myself->configEpoch);
-        return C_OK;
-    } else {
-        return C_ERR;
-    }
-}
-
-/* This function is called when this node is a master, and we receive from
- * another master a configuration epoch that is equal to our configuration
- * epoch.
- *
- * BACKGROUND
- *
- * It is not possible that different slaves get the same config
- * epoch during a failover election, because the slaves need to get voted
- * by a majority. However when we perform a manual resharding of the cluster
- * the node will assign a configuration epoch to itself without to ask
- * for agreement. Usually resharding happens when the cluster is working well
- * and is supervised by the sysadmin, however it is possible for a failover
- * to happen exactly while the node we are resharding a slot to assigns itself
- * a new configuration epoch, but before it is able to propagate it.
- *
- * So technically it is possible in this condition that two nodes end with
- * the same configuration epoch.
- *
- * Another possibility is that there are bugs in the implementation causing
- * this to happen.
- *
- * Moreover when a new cluster is created, all the nodes start with the same
- * configEpoch. This collision resolution code allows nodes to automatically
- * end with a different configEpoch at startup automatically.
- *
- * In all the cases, we want a mechanism that resolves this issue automatically
- * as a safeguard. The same configuration epoch for masters serving different
- * set of slots is not harmful, but it is if the nodes end serving the same
- * slots for some reason (manual errors or software bugs) without a proper
- * failover procedure.
- *
- * In general we want a system that eventually always ends with different
- * masters having different configuration epochs whatever happened, since
- * nothing is worse than a split-brain condition in a distributed system.
- *
- * BEHAVIOR
- *
- * When this function gets called, what happens is that if this node
- * has the lexicographically smaller Node ID compared to the other node
- * with the conflicting epoch (the 'sender' node), it will assign itself
- * the greatest configuration epoch currently detected among nodes plus 1.
- *
- * This means that even if there are multiple nodes colliding, the node
- * with the greatest Node ID never moves forward, so eventually all the nodes
- * end with a different configuration epoch.
- */
-void clusterHandleConfigEpochCollision(clusterNode *sender) {
-    /* Prerequisites: nodes have the same configEpoch and are both masters. */
-    if (sender->configEpoch != myself->configEpoch ||
-        !nodeIsMaster(sender) || !nodeIsMaster(myself)) return;
-    /* Don't act if the colliding node has a smaller Node ID. */
-    if (memcmp(sender->name,myself->name,CLUSTER_NAMELEN) <= 0) return;
-    /* Get the next ID available at the best of this node knowledge. */
-    server.cluster->currentEpoch++;
-    myself->configEpoch = server.cluster->currentEpoch;
-    clusterSaveConfigOrDie(1);
-    serverLog(LL_VERBOSE,
-        "WARNING: configEpoch collision with node %.40s (%s)."
-        " configEpoch set to %llu",
-        sender->name,sender->human_nodename,
-        (unsigned long long) myself->configEpoch);
-}
-
-/* -----------------------------------------------------------------------------
- * CLUSTER nodes blacklist
- *
- * The nodes blacklist is just a way to ensure that a given node with a given
- * Node ID is not re-added before some time elapsed (this time is specified
- * in seconds in CLUSTER_BLACKLIST_TTL).
- *
- * This is useful when we want to remove a node from the cluster completely:
- * when CLUSTER FORGET is called, it also puts the node into the blacklist so
- * that even if we receive gossip messages from other nodes that still remember
- * about the node we want to remove, we don't re-add it before some time.
- *
- * Currently the CLUSTER_BLACKLIST_TTL is set to 1 minute, this means
- * that redis-cli has 60 seconds to send CLUSTER FORGET messages to nodes
- * in the cluster without dealing with the problem of other nodes re-adding
- * back the node to nodes we already sent the FORGET command to.
- *
- * The data structure used is a hash table with an sds string representing
- * the node ID as key, and the time when it is ok to re-add the node as
- * value.
- * -------------------------------------------------------------------------- */
-
-#define CLUSTER_BLACKLIST_TTL 60      /* 1 minute. */
-
-
-/* Before of the addNode() or Exists() operations we always remove expired
- * entries from the black list. This is an O(N) operation but it is not a
- * problem since add / exists operations are called very infrequently and
- * the hash table is supposed to contain very little elements at max.
- * However without the cleanup during long uptime and with some automated
- * node add/removal procedures, entries could accumulate. */
-void clusterBlacklistCleanup(void) {
-    dictIterator *di;
-    dictEntry *de;
-
-    di = dictGetSafeIterator(server.cluster->nodes_black_list);
-    while((de = dictNext(di)) != NULL) {
-        int64_t expire = dictGetUnsignedIntegerVal(de);
-
-        if (expire < server.unixtime)
-            dictDelete(server.cluster->nodes_black_list,dictGetKey(de));
-    }
-    dictReleaseIterator(di);
-}
-
-/* Cleanup the blacklist and add a new node ID to the black list. */
-void clusterBlacklistAddNode(clusterNode *node) {
-    dictEntry *de;
-    sds id = sdsnewlen(node->name,CLUSTER_NAMELEN);
-
-    clusterBlacklistCleanup();
-    if (dictAdd(server.cluster->nodes_black_list,id,NULL) == DICT_OK) {
-        /* If the key was added, duplicate the sds string representation of
-         * the key for the next lookup. We'll free it at the end. */
-        id = sdsdup(id);
-    }
-    de = dictFind(server.cluster->nodes_black_list,id);
-    dictSetUnsignedIntegerVal(de,time(NULL)+CLUSTER_BLACKLIST_TTL);
-    sdsfree(id);
-}
-
-/* Return non-zero if the specified node ID exists in the blacklist.
- * You don't need to pass an sds string here, any pointer to 40 bytes
- * will work. */
-int clusterBlacklistExists(char *nodeid) {
-    sds id = sdsnewlen(nodeid,CLUSTER_NAMELEN);
-    int retval;
-
-    clusterBlacklistCleanup();
-    retval = dictFind(server.cluster->nodes_black_list,id) != NULL;
-    sdsfree(id);
-    return retval;
-}
-
-/* -----------------------------------------------------------------------------
- * CLUSTER messages exchange - PING/PONG and gossip
- * -------------------------------------------------------------------------- */
-
-/* This function checks if a given node should be marked as FAIL.
- * It happens if the following conditions are met:
- *
- * 1) We received enough failure reports from other master nodes via gossip.
- *    Enough means that the majority of the masters signaled the node is
- *    down recently.
- * 2) We believe this node is in PFAIL state.
- *
- * If a failure is detected we also inform the whole cluster about this
- * event trying to force every other node to set the FAIL flag for the node.
- *
- * Note that the form of agreement used here is weak, as we collect the majority
- * of masters state during some time, and even if we force agreement by
- * propagating the FAIL message, because of partitions we may not reach every
- * node. However:
- *
- * 1) Either we reach the majority and eventually the FAIL state will propagate
- *    to all the cluster.
- * 2) Or there is no majority so no slave promotion will be authorized and the
- *    FAIL flag will be cleared after some time.
- */
-void markNodeAsFailingIfNeeded(clusterNode *node) {
-    int failures;
-    int needed_quorum = (server.cluster->size / 2) + 1;
-
-    if (!nodeTimedOut(node)) return; /* We can reach it. */
-    if (nodeFailed(node)) return; /* Already FAILing. */
-
-    failures = clusterNodeFailureReportsCount(node);
-    /* Also count myself as a voter if I'm a master. */
-    if (nodeIsMaster(myself)) failures++;
-    if (failures < needed_quorum) return; /* No weak agreement from masters. */
-
-    serverLog(LL_NOTICE,
-        "Marking node %.40s (%s) as failing (quorum reached).", node->name, node->human_nodename);
-
-    /* Mark the node as failing. */
-    node->flags &= ~CLUSTER_NODE_PFAIL;
-    node->flags |= CLUSTER_NODE_FAIL;
-    node->fail_time = mstime();
-
-    /* Broadcast the failing node name to everybody, forcing all the other
-     * reachable nodes to flag the node as FAIL.
-     * We do that even if this node is a replica and not a master: anyway
-     * the failing state is triggered collecting failure reports from masters,
-     * so here the replica is only helping propagating this status. */
-    clusterSendFail(node->name);
-    clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
-}
-
-/* This function is called only if a node is marked as FAIL, but we are able
- * to reach it again. It checks if there are the conditions to undo the FAIL
- * state. */
-void clearNodeFailureIfNeeded(clusterNode *node) {
-    mstime_t now = mstime();
-
-    serverAssert(nodeFailed(node));
-
-    /* For slaves we always clear the FAIL flag if we can contact the
-     * node again. */
-    if (nodeIsSlave(node) || node->numslots == 0) {
-        serverLog(LL_NOTICE,
-            "Clear FAIL state for node %.40s (%s):%s is reachable again.",
-                node->name,node->human_nodename,
-                nodeIsSlave(node) ? "replica" : "master without slots");
-        node->flags &= ~CLUSTER_NODE_FAIL;
-        clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
-    }
-
-    /* If it is a master and...
-     * 1) The FAIL state is old enough.
-     * 2) It is yet serving slots from our point of view (not failed over).
-     * Apparently no one is going to fix these slots, clear the FAIL flag. */
-    if (nodeIsMaster(node) && node->numslots > 0 &&
-        (now - node->fail_time) >
-        (server.cluster_node_timeout * CLUSTER_FAIL_UNDO_TIME_MULT))
-    {
-        serverLog(LL_NOTICE,
-            "Clear FAIL state for node %.40s (%s): is reachable again and nobody is serving its slots after some time.",
-                node->name, node->human_nodename);
-        node->flags &= ~CLUSTER_NODE_FAIL;
-        clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
-    }
-}
-
-/* Return true if we already have a node in HANDSHAKE state matching the
- * specified ip address and port number. This function is used in order to
- * avoid adding a new handshake node for the same address multiple times. */
-int clusterHandshakeInProgress(char *ip, int port, int cport) {
-    dictIterator *di;
-    dictEntry *de;
-
-    di = dictGetSafeIterator(server.cluster->nodes);
-    while((de = dictNext(di)) != NULL) {
-        clusterNode *node = dictGetVal(de);
-
-        if (!nodeInHandshake(node)) continue;
-        if (!strcasecmp(node->ip,ip) &&
-            getNodeDefaultClientPort(node) == port &&
-            node->cport == cport) break;
-    }
-    dictReleaseIterator(di);
-    return de != NULL;
-}
-
-/* Start a handshake with the specified address if there is not one
- * already in progress. Returns non-zero if the handshake was actually
- * started. On error zero is returned and errno is set to one of the
- * following values:
- *
- * EAGAIN - There is already a handshake in progress for this address.
- * EINVAL - IP or port are not valid. */
-int clusterStartHandshake(char *ip, int port, int cport) {
-    clusterNode *n;
-    char norm_ip[NET_IP_STR_LEN];
-    struct sockaddr_storage sa;
-
-    /* IP sanity check */
-    if (inet_pton(AF_INET,ip,
-            &(((struct sockaddr_in *)&sa)->sin_addr)))
-    {
-        sa.ss_family = AF_INET;
-    } else if (inet_pton(AF_INET6,ip,
-            &(((struct sockaddr_in6 *)&sa)->sin6_addr)))
-    {
-        sa.ss_family = AF_INET6;
-    } else {
-        errno = EINVAL;
-        return 0;
-    }
-
-    /* Port sanity check */
-    if (port <= 0 || port > 65535 || cport <= 0 || cport > 65535) {
-        errno = EINVAL;
-        return 0;
-    }
-
-    /* Set norm_ip as the normalized string representation of the node
-     * IP address. */
-    memset(norm_ip,0,NET_IP_STR_LEN);
-    if (sa.ss_family == AF_INET)
-        inet_ntop(AF_INET,
-            (void*)&(((struct sockaddr_in *)&sa)->sin_addr),
-            norm_ip,NET_IP_STR_LEN);
-    else
-        inet_ntop(AF_INET6,
-            (void*)&(((struct sockaddr_in6 *)&sa)->sin6_addr),
-            norm_ip,NET_IP_STR_LEN);
-
-    if (clusterHandshakeInProgress(norm_ip,port,cport)) {
-        errno = EAGAIN;
-        return 0;
-    }
-
-    /* Add the node with a random address (NULL as first argument to
-     * createClusterNode()). Everything will be fixed during the
-     * handshake. */
-    n = createClusterNode(NULL,CLUSTER_NODE_HANDSHAKE|CLUSTER_NODE_MEET);
-    memcpy(n->ip,norm_ip,sizeof(n->ip));
-    if (server.tls_cluster) {
-        n->tls_port = port;
-    } else {
-        n->tcp_port = port;
-    }
-    n->cport = cport;
-    clusterAddNode(n);
-    return 1;
-}
-
-static void getClientPortFromClusterMsg(clusterMsg *hdr, int *tls_port, int *tcp_port) {
-    if (server.tls_cluster) {
-        *tls_port = ntohs(hdr->port);
-        *tcp_port = ntohs(hdr->pport);
-    } else {
-        *tls_port = ntohs(hdr->pport);
-        *tcp_port = ntohs(hdr->port);
-    }
-}
-
-static void getClientPortFromGossip(clusterMsgDataGossip *g, int *tls_port, int *tcp_port) {
-    if (server.tls_cluster) {
-        *tls_port = ntohs(g->port);
-        *tcp_port = ntohs(g->pport);
-    } else {
-        *tls_port = ntohs(g->pport);
-        *tcp_port = ntohs(g->port);
-    }
-}
-
-/* Process the gossip section of PING or PONG packets.
- * Note that this function assumes that the packet is already sanity-checked
- * by the caller, not in the content of the gossip section, but in the
- * length. */
-void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) {
-    uint16_t count = ntohs(hdr->count);
-    clusterMsgDataGossip *g = (clusterMsgDataGossip*) hdr->data.ping.gossip;
-    clusterNode *sender = link->node ? link->node : clusterLookupNode(hdr->sender, CLUSTER_NAMELEN);
-
-    while(count--) {
-        uint16_t flags = ntohs(g->flags);
-        clusterNode *node;
-        sds ci;
-
-        if (server.verbosity == LL_DEBUG) {
-            ci = representClusterNodeFlags(sdsempty(), flags);
-            serverLog(LL_DEBUG,"GOSSIP %.40s %s:%d@%d %s",
-                g->nodename,
-                g->ip,
-                ntohs(g->port),
-                ntohs(g->cport),
-                ci);
-            sdsfree(ci);
-        }
-
-        /* Convert port and pport into TCP port and TLS port. */
-        int msg_tls_port, msg_tcp_port;
-        getClientPortFromGossip(g, &msg_tls_port, &msg_tcp_port);
-
-        /* Update our state accordingly to the gossip sections */
-        node = clusterLookupNode(g->nodename, CLUSTER_NAMELEN);
-        if (node) {
-            /* We already know this node.
-               Handle failure reports, only when the sender is a master. */
-            if (sender && nodeIsMaster(sender) && node != myself) {
-                if (flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) {
-                    if (clusterNodeAddFailureReport(node,sender)) {
-                        serverLog(LL_VERBOSE,
-                            "Node %.40s (%s) reported node %.40s (%s) as not reachable.",
-                            sender->name, sender->human_nodename, node->name, node->human_nodename);
-                    }
-                    markNodeAsFailingIfNeeded(node);
-                } else {
-                    if (clusterNodeDelFailureReport(node,sender)) {
-                        serverLog(LL_VERBOSE,
-                            "Node %.40s (%s) reported node %.40s (%s) is back online.",
-                            sender->name, sender->human_nodename, node->name, node->human_nodename);
-                    }
-                }
-            }
-
-            /* If from our POV the node is up (no failure flags are set),
-             * we have no pending ping for the node, nor we have failure
-             * reports for this node, update the last pong time with the
-             * one we see from the other nodes. */
-            if (!(flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) &&
-                node->ping_sent == 0 &&
-                clusterNodeFailureReportsCount(node) == 0)
-            {
-                mstime_t pongtime = ntohl(g->pong_received);
-                pongtime *= 1000; /* Convert back to milliseconds. */
-
-                /* Replace the pong time with the received one only if
-                 * it's greater than our view but is not in the future
-                 * (with 500 milliseconds tolerance) from the POV of our
-                 * clock. */
-                if (pongtime <= (server.mstime+500) &&
-                    pongtime > node->pong_received)
-                {
-                    node->pong_received = pongtime;
-                }
-            }
-
-            /* If we already know this node, but it is not reachable, and
-             * we see a different address in the gossip section of a node that
-             * can talk with this other node, update the address, disconnect
-             * the old link if any, so that we'll attempt to connect with the
-             * new address. */
-            if (node->flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL) &&
-                !(flags & CLUSTER_NODE_NOADDR) &&
-                !(flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) &&
-                (strcasecmp(node->ip,g->ip) ||
-                 node->tls_port != (server.tls_cluster ? ntohs(g->port) : ntohs(g->pport)) ||
-                 node->tcp_port != (server.tls_cluster ? ntohs(g->pport) : ntohs(g->port)) ||
-                 node->cport != ntohs(g->cport)))
-            {
-                if (node->link) freeClusterLink(node->link);
-                memcpy(node->ip,g->ip,NET_IP_STR_LEN);
-                node->tcp_port = msg_tcp_port;
-                node->tls_port = msg_tls_port;
-                node->cport = ntohs(g->cport);
-                node->flags &= ~CLUSTER_NODE_NOADDR;
-            }
-        } else {
-            /* If it's not in NOADDR state and we don't have it, we
-             * add it to our trusted dict with exact nodeid and flag.
-             * Note that we cannot simply start a handshake against
-             * this IP/PORT pairs, since IP/PORT can be reused already,
-             * otherwise we risk joining another cluster.
-             *
-             * Note that we require that the sender of this gossip message
-             * is a well known node in our cluster, otherwise we risk
-             * joining another cluster. */
-            if (sender &&
-                !(flags & CLUSTER_NODE_NOADDR) &&
-                !clusterBlacklistExists(g->nodename))
-            {
-                clusterNode *node;
-                node = createClusterNode(g->nodename, flags);
-                memcpy(node->ip,g->ip,NET_IP_STR_LEN);
-                node->tcp_port = msg_tcp_port;
-                node->tls_port = msg_tls_port;
-                node->cport = ntohs(g->cport);
-                clusterAddNode(node);
-            }
-        }
-
-        /* Next node */
-        g++;
-    }
-}
-
-/* IP -> string conversion. 'buf' is supposed to at least be 46 bytes.
- * If 'announced_ip' length is non-zero, it is used instead of extracting
- * the IP from the socket peer address. */
-int nodeIp2String(char *buf, clusterLink *link, char *announced_ip) {
-    if (announced_ip[0] != '\0') {
-        memcpy(buf,announced_ip,NET_IP_STR_LEN);
-        buf[NET_IP_STR_LEN-1] = '\0'; /* We are not sure the input is sane. */
-        return C_OK;
-    } else {
-        if (connAddrPeerName(link->conn, buf, NET_IP_STR_LEN, NULL) == -1) {
-            serverLog(LL_NOTICE, "Error converting peer IP to string: %s",
-                link->conn ? connGetLastError(link->conn) : "no link");
-            return C_ERR;
-        }
-        return C_OK;
-    }
-}
-
-/* Update the node address to the IP address that can be extracted
- * from link->fd, or if hdr->myip is non empty, to the address the node
- * is announcing us. The port is taken from the packet header as well.
- *
- * If the address or port changed, disconnect the node link so that we'll
- * connect again to the new address.
- *
- * If the ip/port pair are already correct no operation is performed at
- * all.
- *
- * The function returns 0 if the node address is still the same,
- * otherwise 1 is returned. */
-int nodeUpdateAddressIfNeeded(clusterNode *node, clusterLink *link,
-                              clusterMsg *hdr)
-{
-    char ip[NET_IP_STR_LEN] = {0};
-    int cport = ntohs(hdr->cport);
-    int tcp_port, tls_port;
-    getClientPortFromClusterMsg(hdr, &tls_port, &tcp_port);
-
-    /* We don't proceed if the link is the same as the sender link, as this
-     * function is designed to see if the node link is consistent with the
-     * symmetric link that is used to receive PINGs from the node.
-     *
-     * As a side effect this function never frees the passed 'link', so
-     * it is safe to call during packet processing. */
-    if (link == node->link) return 0;
-
-    /* If the peer IP is unavailable for some reasons like invalid fd or closed
-     * link, just give up the update this time, and the update will be retried
-     * in the next round of PINGs */
-    if (nodeIp2String(ip,link,hdr->myip) == C_ERR) return 0;
-
-    if (node->tcp_port == tcp_port && node->cport == cport && node->tls_port == tls_port &&
-        strcmp(ip,node->ip) == 0) return 0;
-
-    /* IP / port is different, update it. */
-    memcpy(node->ip,ip,sizeof(ip));
-    node->tcp_port = tcp_port;
-    node->tls_port = tls_port;
-    node->cport = cport;
-    if (node->link) freeClusterLink(node->link);
-    node->flags &= ~CLUSTER_NODE_NOADDR;
-    serverLog(LL_NOTICE,"Address updated for node %.40s (%s), now %s:%d",
-        node->name, node->human_nodename, node->ip, getNodeDefaultClientPort(node)); 
-
-    /* Check if this is our master and we have to change the
-     * replication target as well. */
-    if (nodeIsSlave(myself) && myself->slaveof == node)
-        replicationSetMaster(node->ip, getNodeDefaultReplicationPort(node));
-    return 1;
-}
-
-/* Reconfigure the specified node 'n' as a master. This function is called when
- * a node that we believed to be a slave is now acting as master in order to
- * update the state of the node. */
-void clusterSetNodeAsMaster(clusterNode *n) {
-    if (nodeIsMaster(n)) return;
-
-    if (n->slaveof) {
-        clusterNodeRemoveSlave(n->slaveof,n);
-        if (n != myself) n->flags |= CLUSTER_NODE_MIGRATE_TO;
-    }
-    n->flags &= ~CLUSTER_NODE_SLAVE;
-    n->flags |= CLUSTER_NODE_MASTER;
-    n->slaveof = NULL;
-
-    /* Update config and state. */
-    clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
-                         CLUSTER_TODO_UPDATE_STATE);
-}
-
-/* This function is called when we receive a master configuration via a
- * PING, PONG or UPDATE packet. What we receive is a node, a configEpoch of the
- * node, and the set of slots claimed under this configEpoch.
- *
- * What we do is to rebind the slots with newer configuration compared to our
- * local configuration, and if needed, we turn ourself into a replica of the
- * node (see the function comments for more info).
- *
- * The 'sender' is the node for which we received a configuration update.
- * Sometimes it is not actually the "Sender" of the information, like in the
- * case we receive the info via an UPDATE packet. */
-void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoch, unsigned char *slots) {
-    int j;
-    clusterNode *curmaster = NULL, *newmaster = NULL;
-    /* The dirty slots list is a list of slots for which we lose the ownership
-     * while having still keys inside. This usually happens after a failover
-     * or after a manual cluster reconfiguration operated by the admin.
-     *
-     * If the update message is not able to demote a master to slave (in this
-     * case we'll resync with the master updating the whole key space), we
-     * need to delete all the keys in the slots we lost ownership. */
-    uint16_t dirty_slots[CLUSTER_SLOTS];
-    int dirty_slots_count = 0;
-
-    /* We should detect if sender is new master of our shard.
-     * We will know it if all our slots were migrated to sender, and sender
-     * has no slots except ours */
-    int sender_slots = 0;
-    int migrated_our_slots = 0;
-
-    /* Here we set curmaster to this node or the node this node
-     * replicates to if it's a slave. In the for loop we are
-     * interested to check if slots are taken away from curmaster. */
-    curmaster = nodeIsMaster(myself) ? myself : myself->slaveof;
-
-    if (sender == myself) {
-        serverLog(LL_NOTICE,"Discarding UPDATE message about myself.");
-        return;
-    }
-
-    for (j = 0; j < CLUSTER_SLOTS; j++) {
-        if (bitmapTestBit(slots,j)) {
-            sender_slots++;
-
-            /* The slot is already bound to the sender of this message. */
-            if (server.cluster->slots[j] == sender) {
-                bitmapClearBit(server.cluster->owner_not_claiming_slot, j);
-                continue;
-            }
-
-            /* The slot is in importing state, it should be modified only
-             * manually via redis-cli (example: a resharding is in progress
-             * and the migrating side slot was already closed and is advertising
-             * a new config. We still want the slot to be closed manually). */
-            if (server.cluster->importing_slots_from[j]) continue;
-
-            /* We rebind the slot to the new node claiming it if:
-             * 1) The slot was unassigned or the previous owner no longer owns the slot or
-             *    the new node claims it with a greater configEpoch.
-             * 2) We are not currently importing the slot. */
-            if (isSlotUnclaimed(j) ||
-                server.cluster->slots[j]->configEpoch < senderConfigEpoch)
-            {
-                /* Was this slot mine, and still contains keys? Mark it as
-                 * a dirty slot. */
-                if (server.cluster->slots[j] == myself &&
-                    countKeysInSlot(j) &&
-                    sender != myself)
-                {
-                    dirty_slots[dirty_slots_count] = j;
-                    dirty_slots_count++;
-                }
-
-                if (server.cluster->slots[j] == curmaster) {
-                    newmaster = sender;
-                    migrated_our_slots++;
-                }
-                clusterDelSlot(j);
-                clusterAddSlot(sender,j);
-                bitmapClearBit(server.cluster->owner_not_claiming_slot, j);
-                clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
-                                     CLUSTER_TODO_UPDATE_STATE|
-                                     CLUSTER_TODO_FSYNC_CONFIG);
-            }
-        } else if (server.cluster->slots[j] == sender) {
-            /* The slot is currently bound to the sender but the sender is no longer
-             * claiming it. We don't want to unbind the slot yet as it can cause the cluster
-             * to move to FAIL state and also throw client error. Keeping the slot bound to
-             * the previous owner will cause a few client side redirects, but won't throw
-             * any errors. We will keep track of the uncertainty in ownership to avoid
-             * propagating misinformation about this slot's ownership using UPDATE
-             * messages. */
-            bitmapSetBit(server.cluster->owner_not_claiming_slot, j);
-        }
-    }
-
-    /* After updating the slots configuration, don't do any actual change
-     * in the state of the server if a module disabled Redis Cluster
-     * keys redirections. */
-    if (server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_REDIRECTION)
-        return;
-
-    /* If at least one slot was reassigned from a node to another node
-     * with a greater configEpoch, it is possible that:
-     * 1) We are a master left without slots. This means that we were
-     *    failed over and we should turn into a replica of the new
-     *    master.
-     * 2) We are a slave and our master is left without slots. We need
-     *    to replicate to the new slots owner. */
-    if (newmaster && curmaster->numslots == 0 &&
-            (server.cluster_allow_replica_migration ||
-             sender_slots == migrated_our_slots)) {
-        serverLog(LL_NOTICE,
-            "Configuration change detected. Reconfiguring myself "
-            "as a replica of %.40s (%s)", sender->name, sender->human_nodename);
-        clusterSetMaster(sender);
-        clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
-                             CLUSTER_TODO_UPDATE_STATE|
-                             CLUSTER_TODO_FSYNC_CONFIG);
-    } else if (myself->slaveof && myself->slaveof->slaveof &&
-               /* In some rare case when CLUSTER FAILOVER TAKEOVER is used, it
-                * can happen that myself is a replica of a replica of myself. If
-                * this happens, we do nothing to avoid a crash and wait for the
-                * admin to repair the cluster. */
-               myself->slaveof->slaveof != myself)
-    {
-        /* Safeguard against sub-replicas. A replica's master can turn itself
-         * into a replica if its last slot is removed. If no other node takes
-         * over the slot, there is nothing else to trigger replica migration. */
-        serverLog(LL_NOTICE,
-                  "I'm a sub-replica! Reconfiguring myself as a replica of grandmaster %.40s (%s)",
-                  myself->slaveof->slaveof->name, myself->slaveof->slaveof->human_nodename);
-        clusterSetMaster(myself->slaveof->slaveof);
-        clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
-                             CLUSTER_TODO_UPDATE_STATE|
-                             CLUSTER_TODO_FSYNC_CONFIG);
-    } else if (dirty_slots_count) {
-        /* If we are here, we received an update message which removed
-         * ownership for certain slots we still have keys about, but still
-         * we are serving some slots, so this master node was not demoted to
-         * a slave.
-         *
-         * In order to maintain a consistent state between keys and slots
-         * we need to remove all the keys from the slots we lost. */
-        for (j = 0; j < dirty_slots_count; j++)
-            delKeysInSlot(dirty_slots[j]);
-    }
-}
-
-/* Cluster ping extensions.
- *
- * The ping/pong/meet messages support arbitrary extensions to add additional
- * metadata to the messages that are sent between the various nodes in the
- * cluster. The extensions take the form:
- * [ Header length + type (8 bytes) ] 
- * [ Extension information (Arbitrary length, but must be 8 byte padded) ]
- */
-
-
-/* Returns the length of a given extension */
-static uint32_t getPingExtLength(clusterMsgPingExt *ext) {
-    return ntohl(ext->length);
-}
-
-/* Returns the initial position of ping extensions. May return an invalid
- * address if there are no ping extensions. */
-static clusterMsgPingExt *getInitialPingExt(clusterMsg *hdr, int count) {
-    clusterMsgPingExt *initial = (clusterMsgPingExt*) &(hdr->data.ping.gossip[count]);
-    return initial;
-} 
-
-/* Given a current ping extension, returns the start of the next extension. May return
- * an invalid address if there are no further ping extensions. */
-static clusterMsgPingExt *getNextPingExt(clusterMsgPingExt *ext) {
-    clusterMsgPingExt *next = (clusterMsgPingExt *) (((char *) ext) + getPingExtLength(ext));
-    return next;
-}
-
-/* All PING extensions must be 8-byte aligned */
-uint32_t getAlignedPingExtSize(uint32_t dataSize) {
-
-    return sizeof(clusterMsgPingExt) + EIGHT_BYTE_ALIGN(dataSize);
-}
-
-uint32_t getHostnamePingExtSize(void) {
-    if (sdslen(myself->hostname) == 0) {
-        return 0;
-    }
-    return getAlignedPingExtSize(sdslen(myself->hostname) + 1);
-}
-
-uint32_t getHumanNodenamePingExtSize(void) {
-    if (sdslen(myself->human_nodename) == 0) {
-        return 0;
-    }
-    return getAlignedPingExtSize(sdslen(myself->human_nodename) + 1);
-}
-
-uint32_t getShardIdPingExtSize(void) {
-    return getAlignedPingExtSize(sizeof(clusterMsgPingExtShardId));
-}
-
-uint32_t getForgottenNodeExtSize(void) {
-    return getAlignedPingExtSize(sizeof(clusterMsgPingExtForgottenNode));
-}
-
-void *preparePingExt(clusterMsgPingExt *ext, uint16_t type, uint32_t length) {
-    ext->type = htons(type);
-    ext->length = htonl(length);
-    return &ext->ext[0];
-}
-
-clusterMsgPingExt *nextPingExt(clusterMsgPingExt *ext) {
-    return (clusterMsgPingExt *)((char*)ext + ntohl(ext->length));
-}
-
-/* 1. If a NULL hdr is provided, compute the extension size;
- * 2. If a non-NULL hdr is provided, write the hostname ping
- *    extension at the start of the cursor. This function
- *    will update the cursor to point to the end of the
- *    written extension and will return the amount of bytes
- *    written. */
-uint32_t writePingExt(clusterMsg *hdr, int gossipcount)  {
-    uint16_t extensions = 0;
-    uint32_t totlen = 0;
-    clusterMsgPingExt *cursor = NULL;
-    /* Set the initial extension position */
-    if (hdr != NULL) {
-        cursor = getInitialPingExt(hdr, gossipcount);
-    }
-
-    /* hostname is optional */
-    if (sdslen(myself->hostname) != 0) {
-        if (cursor != NULL) {
-            /* Populate hostname */
-            clusterMsgPingExtHostname *ext = preparePingExt(cursor, CLUSTERMSG_EXT_TYPE_HOSTNAME, getHostnamePingExtSize());
-            memcpy(ext->hostname, myself->hostname, sdslen(myself->hostname));
-
-            /* Move the write cursor */
-            cursor = nextPingExt(cursor);
-        }
-
-        totlen += getHostnamePingExtSize();
-        extensions++;
-    }
-
-    if (sdslen(myself->human_nodename) != 0) {
-        if (cursor != NULL) {
-            /* Populate human_nodename */
-            clusterMsgPingExtHumanNodename *ext = preparePingExt(cursor, CLUSTERMSG_EXT_TYPE_HUMAN_NODENAME, getHumanNodenamePingExtSize());
-            memcpy(ext->human_nodename, myself->human_nodename, sdslen(myself->human_nodename));
-        
-	    /* Move the write cursor */
-            cursor = nextPingExt(cursor);
-        }
-
-        totlen += getHumanNodenamePingExtSize();
-        extensions++;
-    }
-
-    /* Gossip forgotten nodes */
-    if (dictSize(server.cluster->nodes_black_list) > 0) {
-        dictIterator *di = dictGetIterator(server.cluster->nodes_black_list);
-        dictEntry *de;
-        while ((de = dictNext(di)) != NULL) {
-            if (cursor != NULL) {
-                uint64_t expire = dictGetUnsignedIntegerVal(de);
-                if ((time_t)expire < server.unixtime) continue; /* already expired */
-                uint64_t ttl = expire - server.unixtime;
-                clusterMsgPingExtForgottenNode *ext = preparePingExt(cursor, CLUSTERMSG_EXT_TYPE_FORGOTTEN_NODE, getForgottenNodeExtSize());
-                memcpy(ext->name, dictGetKey(de), CLUSTER_NAMELEN);
-                ext->ttl = htonu64(ttl);
-
-                /* Move the write cursor */
-                cursor = nextPingExt(cursor);
-            }
-            totlen += getForgottenNodeExtSize();
-            extensions++;
-        }
-        dictReleaseIterator(di);
-    }
-
-    /* Populate shard_id */
-    if (cursor != NULL) {
-        clusterMsgPingExtShardId *ext = preparePingExt(cursor, CLUSTERMSG_EXT_TYPE_SHARDID, getShardIdPingExtSize());
-        memcpy(ext->shard_id, myself->shard_id, CLUSTER_NAMELEN);
-
-        /* Move the write cursor */
-        cursor = nextPingExt(cursor);
-    }
-    totlen += getShardIdPingExtSize();
-    extensions++;
-
-    if (hdr != NULL) {
-        if (extensions != 0) {
-            hdr->mflags[0] |= CLUSTERMSG_FLAG0_EXT_DATA;
-        }
-        hdr->extensions = htons(extensions);
-    }
-
-    return totlen;
-}
-
-/* We previously validated the extensions, so this function just needs to
- * handle the extensions. */
-void clusterProcessPingExtensions(clusterMsg *hdr, clusterLink *link) {
-    clusterNode *sender = link->node ? link->node : clusterLookupNode(hdr->sender, CLUSTER_NAMELEN);
-    char *ext_hostname = NULL;
-    char *ext_humannodename = NULL;
-    char *ext_shardid = NULL;
-    uint16_t extensions = ntohs(hdr->extensions);
-    /* Loop through all the extensions and process them */
-    clusterMsgPingExt *ext = getInitialPingExt(hdr, ntohs(hdr->count));
-    while (extensions--) {
-        uint16_t type = ntohs(ext->type);
-        if (type == CLUSTERMSG_EXT_TYPE_HOSTNAME) {
-            clusterMsgPingExtHostname *hostname_ext = (clusterMsgPingExtHostname *) &(ext->ext[0].hostname);
-            ext_hostname = hostname_ext->hostname;
-	} else if (type == CLUSTERMSG_EXT_TYPE_HUMAN_NODENAME) {
-            clusterMsgPingExtHumanNodename *humannodename_ext = (clusterMsgPingExtHumanNodename *) &(ext->ext[0].human_nodename);
-            ext_humannodename = humannodename_ext->human_nodename;
-        } else if (type == CLUSTERMSG_EXT_TYPE_FORGOTTEN_NODE) {
-            clusterMsgPingExtForgottenNode *forgotten_node_ext = &(ext->ext[0].forgotten_node);
-            clusterNode *n = clusterLookupNode(forgotten_node_ext->name, CLUSTER_NAMELEN);
-            if (n && n != myself && !(nodeIsSlave(myself) && myself->slaveof == n)) {
-                sds id = sdsnewlen(forgotten_node_ext->name, CLUSTER_NAMELEN);
-                dictEntry *de = dictAddRaw(server.cluster->nodes_black_list, id, NULL);
-                serverAssert(de != NULL);
-                uint64_t expire = server.unixtime + ntohu64(forgotten_node_ext->ttl);
-                dictSetUnsignedIntegerVal(de, expire);
-                clusterDelNode(n);
-                clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|
-                                     CLUSTER_TODO_SAVE_CONFIG);
-            }
-        } else if (type == CLUSTERMSG_EXT_TYPE_SHARDID) {
-            clusterMsgPingExtShardId *shardid_ext = (clusterMsgPingExtShardId *) &(ext->ext[0].shard_id);
-            ext_shardid = shardid_ext->shard_id;
-        } else {
-            /* Unknown type, we will ignore it but log what happened. */
-            serverLog(LL_WARNING, "Received unknown extension type %d", type);
-        }
-
-        /* We know this will be valid since we validated it ahead of time */
-        ext = getNextPingExt(ext);
-    }
-    /* If the node did not send us a hostname extension, assume
-     * they don't have an announced hostname. Otherwise, we'll
-     * set it now. */
-    updateAnnouncedHostname(sender, ext_hostname);
-    updateAnnouncedHumanNodename(sender, ext_humannodename);
-    updateShardId(sender, ext_shardid);
-}
-
-static clusterNode *getNodeFromLinkAndMsg(clusterLink *link, clusterMsg *hdr) {
-    clusterNode *sender;
-    if (link->node && !nodeInHandshake(link->node)) {
-        /* If the link has an associated node, use that so that we don't have to look it
-         * up every time, except when the node is still in handshake, the node still has
-         * a random name thus not truly "known". */
-        sender = link->node;
-    } else {
-        /* Otherwise, fetch sender based on the message */
-        sender = clusterLookupNode(hdr->sender, CLUSTER_NAMELEN);
-        /* We know the sender node but haven't associate it with the link. This must
-         * be an inbound link because only for inbound links we didn't know which node
-         * to associate when they were created. */
-        if (sender && !link->node) {
-            setClusterNodeToInboundClusterLink(sender, link);
-        }
-    }
-    return sender;
-}
-
-/* When this function is called, there is a packet to process starting
- * at link->rcvbuf. Releasing the buffer is up to the caller, so this
- * function should just handle the higher level stuff of processing the
- * packet, modifying the cluster state if needed.
- *
- * The function returns 1 if the link is still valid after the packet
- * was processed, otherwise 0 if the link was freed since the packet
- * processing lead to some inconsistency error (for instance a PONG
- * received from the wrong sender ID). */
-int clusterProcessPacket(clusterLink *link) {
-    clusterMsg *hdr = (clusterMsg*) link->rcvbuf;
-    uint32_t totlen = ntohl(hdr->totlen);
-    uint16_t type = ntohs(hdr->type);
-    mstime_t now = mstime();
-
-    if (type < CLUSTERMSG_TYPE_COUNT)
-        server.cluster->stats_bus_messages_received[type]++;
-    serverLog(LL_DEBUG,"--- Processing packet of type %s, %lu bytes",
-        clusterGetMessageTypeString(type), (unsigned long) totlen);
-
-    /* Perform sanity checks */
-    if (totlen < 16) return 1; /* At least signature, version, totlen, count. */
-    if (totlen > link->rcvbuf_len) return 1;
-
-    if (ntohs(hdr->ver) != CLUSTER_PROTO_VER) {
-        /* Can't handle messages of different versions. */
-        return 1;
-    }
-
-    if (type == server.cluster_drop_packet_filter) {
-        serverLog(LL_WARNING, "Dropping packet that matches debug drop filter");
-        return 1;
-    }
-
-    uint16_t flags = ntohs(hdr->flags);
-    uint16_t extensions = ntohs(hdr->extensions);
-    uint64_t senderCurrentEpoch = 0, senderConfigEpoch = 0;
-    uint32_t explen; /* expected length of this packet */
-    clusterNode *sender;
-
-    if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_PONG ||
-        type == CLUSTERMSG_TYPE_MEET)
-    {
-        uint16_t count = ntohs(hdr->count);
-
-        explen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
-        explen += (sizeof(clusterMsgDataGossip)*count);
-
-        /* If there is extension data, which doesn't have a fixed length,
-         * loop through them and validate the length of it now. */
-        if (hdr->mflags[0] & CLUSTERMSG_FLAG0_EXT_DATA) {
-            clusterMsgPingExt *ext = getInitialPingExt(hdr, count);
-            while (extensions--) {
-                uint16_t extlen = getPingExtLength(ext);
-                if (extlen % 8 != 0) {
-                    serverLog(LL_WARNING, "Received a %s packet without proper padding (%d bytes)",
-                        clusterGetMessageTypeString(type), (int) extlen);
-                    return 1;
-                }
-                if ((totlen - explen) < extlen) {
-                    serverLog(LL_WARNING, "Received invalid %s packet with extension data that exceeds "
-                        "total packet length (%lld)", clusterGetMessageTypeString(type),
-                        (unsigned long long) totlen);
-                    return 1;
-                }
-                explen += extlen;
-                ext = getNextPingExt(ext);
-            }
-        }
-    } else if (type == CLUSTERMSG_TYPE_FAIL) {
-        explen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
-        explen += sizeof(clusterMsgDataFail);
-    } else if (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) {
-        explen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
-        explen += sizeof(clusterMsgDataPublish) -
-                8 +
-                ntohl(hdr->data.publish.msg.channel_len) +
-                ntohl(hdr->data.publish.msg.message_len);
-    } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST ||
-               type == CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK ||
-               type == CLUSTERMSG_TYPE_MFSTART)
-    {
-        explen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
-    } else if (type == CLUSTERMSG_TYPE_UPDATE) {
-        explen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
-        explen += sizeof(clusterMsgDataUpdate);
-    } else if (type == CLUSTERMSG_TYPE_MODULE) {
-        explen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
-        explen += sizeof(clusterMsgModule) -
-                3 + ntohl(hdr->data.module.msg.len);
-    } else {
-        /* We don't know this type of packet, so we assume it's well formed. */
-        explen = totlen;
-    }
-
-    if (totlen != explen) {
-        serverLog(LL_WARNING, "Received invalid %s packet of length %lld but expected length %lld",
-            clusterGetMessageTypeString(type), (unsigned long long) totlen, (unsigned long long) explen);
-        return 1;
-    }
-
-    sender = getNodeFromLinkAndMsg(link, hdr);
-
-    /* Update the last time we saw any data from this node. We
-     * use this in order to avoid detecting a timeout from a node that
-     * is just sending a lot of data in the cluster bus, for instance
-     * because of Pub/Sub. */
-    if (sender) sender->data_received = now;
-
-    if (sender && !nodeInHandshake(sender)) {
-        /* Update our currentEpoch if we see a newer epoch in the cluster. */
-        senderCurrentEpoch = ntohu64(hdr->currentEpoch);
-        senderConfigEpoch = ntohu64(hdr->configEpoch);
-        if (senderCurrentEpoch > server.cluster->currentEpoch)
-            server.cluster->currentEpoch = senderCurrentEpoch;
-        /* Update the sender configEpoch if it is publishing a newer one. */
-        if (senderConfigEpoch > sender->configEpoch) {
-            sender->configEpoch = senderConfigEpoch;
-            clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
-                                 CLUSTER_TODO_FSYNC_CONFIG);
-        }
-        /* Update the replication offset info for this node. */
-        sender->repl_offset = ntohu64(hdr->offset);
-        sender->repl_offset_time = now;
-        /* If we are a slave performing a manual failover and our master
-         * sent its offset while already paused, populate the MF state. */
-        if (server.cluster->mf_end &&
-            nodeIsSlave(myself) &&
-            myself->slaveof == sender &&
-            hdr->mflags[0] & CLUSTERMSG_FLAG0_PAUSED &&
-            server.cluster->mf_master_offset == -1)
-        {
-            server.cluster->mf_master_offset = sender->repl_offset;
-            clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_MANUALFAILOVER);
-            serverLog(LL_NOTICE,
-                "Received replication offset for paused "
-                "master manual failover: %lld",
-                server.cluster->mf_master_offset);
-        }
-    }
-
-    /* Initial processing of PING and MEET requests replying with a PONG. */
-    if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_MEET) {
-        /* We use incoming MEET messages in order to set the address
-         * for 'myself', since only other cluster nodes will send us
-         * MEET messages on handshakes, when the cluster joins, or
-         * later if we changed address, and those nodes will use our
-         * official address to connect to us. So by obtaining this address
-         * from the socket is a simple way to discover / update our own
-         * address in the cluster without it being hardcoded in the config.
-         *
-         * However if we don't have an address at all, we update the address
-         * even with a normal PING packet. If it's wrong it will be fixed
-         * by MEET later. */
-        if ((type == CLUSTERMSG_TYPE_MEET || myself->ip[0] == '\0') &&
-            server.cluster_announce_ip == NULL)
-        {
-            char ip[NET_IP_STR_LEN];
-
-            if (connAddrSockName(link->conn,ip,sizeof(ip),NULL) != -1 &&
-                strcmp(ip,myself->ip))
-            {
-                memcpy(myself->ip,ip,NET_IP_STR_LEN);
-                serverLog(LL_NOTICE,"IP address for this node updated to %s",
-                    myself->ip);
-                clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
-            }
-        }
-
-        /* Add this node if it is new for us and the msg type is MEET.
-         * In this stage we don't try to add the node with the right
-         * flags, slaveof pointer, and so forth, as this details will be
-         * resolved when we'll receive PONGs from the node. */
-        if (!sender && type == CLUSTERMSG_TYPE_MEET) {
-            clusterNode *node;
-
-            node = createClusterNode(NULL,CLUSTER_NODE_HANDSHAKE);
-            serverAssert(nodeIp2String(node->ip,link,hdr->myip) == C_OK);
-            getClientPortFromClusterMsg(hdr, &node->tls_port, &node->tcp_port);
-            node->cport = ntohs(hdr->cport);
-            clusterAddNode(node);
-            clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
-        }
-
-        /* If this is a MEET packet from an unknown node, we still process
-         * the gossip section here since we have to trust the sender because
-         * of the message type. */
-        if (!sender && type == CLUSTERMSG_TYPE_MEET)
-            clusterProcessGossipSection(hdr,link);
-
-        /* Anyway reply with a PONG */
-        clusterSendPing(link,CLUSTERMSG_TYPE_PONG);
-    }
-
-    /* PING, PONG, MEET: process config information. */
-    if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_PONG ||
-        type == CLUSTERMSG_TYPE_MEET)
-    {
-        serverLog(LL_DEBUG,"%s packet received: %.40s",
-            clusterGetMessageTypeString(type),
-            link->node ? link->node->name : "NULL");
-        if (!link->inbound) {
-            if (nodeInHandshake(link->node)) {
-                /* If we already have this node, try to change the
-                 * IP/port of the node with the new one. */
-                if (sender) {
-                    serverLog(LL_VERBOSE,
-                        "Handshake: we already know node %.40s (%s), "
-                        "updating the address if needed.", sender->name, sender->human_nodename);
-                    if (nodeUpdateAddressIfNeeded(sender,link,hdr))
-                    {
-                        clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
-                                             CLUSTER_TODO_UPDATE_STATE);
-                    }
-                    /* Free this node as we already have it. This will
-                     * cause the link to be freed as well. */
-                    clusterDelNode(link->node);
-                    return 0;
-                }
-
-                /* First thing to do is replacing the random name with the
-                 * right node name if this was a handshake stage. */
-                clusterRenameNode(link->node, hdr->sender);
-                serverLog(LL_DEBUG,"Handshake with node %.40s completed.",
-                    link->node->name);
-                link->node->flags &= ~CLUSTER_NODE_HANDSHAKE;
-                link->node->flags |= flags&(CLUSTER_NODE_MASTER|CLUSTER_NODE_SLAVE);
-                clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
-            } else if (memcmp(link->node->name,hdr->sender,
-                        CLUSTER_NAMELEN) != 0)
-            {
-                /* If the reply has a non matching node ID we
-                 * disconnect this node and set it as not having an associated
-                 * address. */
-                serverLog(LL_DEBUG,"PONG contains mismatching sender ID. About node %.40s added %d ms ago, having flags %d",
-                    link->node->name,
-                    (int)(now-(link->node->ctime)),
-                    link->node->flags);
-                link->node->flags |= CLUSTER_NODE_NOADDR;
-                link->node->ip[0] = '\0';
-                link->node->tcp_port = 0;
-                link->node->tls_port = 0;
-                link->node->cport = 0;
-                freeClusterLink(link);
-                clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
-                return 0;
-            }
-        }
-
-        /* Copy the CLUSTER_NODE_NOFAILOVER flag from what the sender
-         * announced. This is a dynamic flag that we receive from the
-         * sender, and the latest status must be trusted. We need it to
-         * be propagated because the slave ranking used to understand the
-         * delay of each slave in the voting process, needs to know
-         * what are the instances really competing. */
-        if (sender) {
-            int nofailover = flags & CLUSTER_NODE_NOFAILOVER;
-            sender->flags &= ~CLUSTER_NODE_NOFAILOVER;
-            sender->flags |= nofailover;
-        }
-
-        /* Update the node address if it changed. */
-        if (sender && type == CLUSTERMSG_TYPE_PING &&
-            !nodeInHandshake(sender) &&
-            nodeUpdateAddressIfNeeded(sender,link,hdr))
-        {
-            clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
-                                 CLUSTER_TODO_UPDATE_STATE);
-        }
-
-        /* Update our info about the node */
-        if (!link->inbound && type == CLUSTERMSG_TYPE_PONG) {
-            link->node->pong_received = now;
-            link->node->ping_sent = 0;
-
-            /* The PFAIL condition can be reversed without external
-             * help if it is momentary (that is, if it does not
-             * turn into a FAIL state).
-             *
-             * The FAIL condition is also reversible under specific
-             * conditions detected by clearNodeFailureIfNeeded(). */
-            if (nodeTimedOut(link->node)) {
-                link->node->flags &= ~CLUSTER_NODE_PFAIL;
-                clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
-                                     CLUSTER_TODO_UPDATE_STATE);
-            } else if (nodeFailed(link->node)) {
-                clearNodeFailureIfNeeded(link->node);
-            }
-        }
-
-        /* Check for role switch: slave -> master or master -> slave. */
-        if (sender) {
-            if (!memcmp(hdr->slaveof,CLUSTER_NODE_NULL_NAME,
-                sizeof(hdr->slaveof)))
-            {
-                /* Node is a master. */
-                clusterSetNodeAsMaster(sender);
-            } else {
-                /* Node is a slave. */
-                clusterNode *master = clusterLookupNode(hdr->slaveof, CLUSTER_NAMELEN);
-
-                if (nodeIsMaster(sender)) {
-                    /* Master turned into a slave! Reconfigure the node. */
-                    clusterDelNodeSlots(sender);
-                    sender->flags &= ~(CLUSTER_NODE_MASTER|
-                                       CLUSTER_NODE_MIGRATE_TO);
-                    sender->flags |= CLUSTER_NODE_SLAVE;
-
-                    /* Update config and state. */
-                    clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
-                                         CLUSTER_TODO_UPDATE_STATE);
-                }
-
-                /* Master node changed for this slave? */
-                if (master && sender->slaveof != master) {
-                    if (sender->slaveof)
-                        clusterNodeRemoveSlave(sender->slaveof,sender);
-                    clusterNodeAddSlave(master,sender);
-                    sender->slaveof = master;
-
-                    /* Update config. */
-                    clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
-                }
-            }
-        }
-
-        /* Update our info about served slots.
-         *
-         * Note: this MUST happen after we update the master/slave state
-         * so that CLUSTER_NODE_MASTER flag will be set. */
-
-        /* Many checks are only needed if the set of served slots this
-         * instance claims is different compared to the set of slots we have
-         * for it. Check this ASAP to avoid other computational expansive
-         * checks later. */
-        clusterNode *sender_master = NULL; /* Sender or its master if slave. */
-        int dirty_slots = 0; /* Sender claimed slots don't match my view? */
-
-        if (sender) {
-            sender_master = nodeIsMaster(sender) ? sender : sender->slaveof;
-            if (sender_master) {
-                dirty_slots = memcmp(sender_master->slots,
-                        hdr->myslots,sizeof(hdr->myslots)) != 0;
-            }
-        }
-
-        /* 1) If the sender of the message is a master, and we detected that
-         *    the set of slots it claims changed, scan the slots to see if we
-         *    need to update our configuration. */
-        if (sender && nodeIsMaster(sender) && dirty_slots)
-            clusterUpdateSlotsConfigWith(sender,senderConfigEpoch,hdr->myslots);
-
-        /* 2) We also check for the reverse condition, that is, the sender
-         *    claims to serve slots we know are served by a master with a
-         *    greater configEpoch. If this happens we inform the sender.
-         *
-         * This is useful because sometimes after a partition heals, a
-         * reappearing master may be the last one to claim a given set of
-         * hash slots, but with a configuration that other instances know to
-         * be deprecated. Example:
-         *
-         * A and B are master and slave for slots 1,2,3.
-         * A is partitioned away, B gets promoted.
-         * B is partitioned away, and A returns available.
-         *
-         * Usually B would PING A publishing its set of served slots and its
-         * configEpoch, but because of the partition B can't inform A of the
-         * new configuration, so other nodes that have an updated table must
-         * do it. In this way A will stop to act as a master (or can try to
-         * failover if there are the conditions to win the election). */
-        if (sender && dirty_slots) {
-            int j;
-
-            for (j = 0; j < CLUSTER_SLOTS; j++) {
-                if (bitmapTestBit(hdr->myslots,j)) {
-                    if (server.cluster->slots[j] == sender ||
-                        isSlotUnclaimed(j)) continue;
-                    if (server.cluster->slots[j]->configEpoch >
-                        senderConfigEpoch)
-                    {
-                        serverLog(LL_VERBOSE,
-                            "Node %.40s has old slots configuration, sending "
-                            "an UPDATE message about %.40s",
-                                sender->name, server.cluster->slots[j]->name);
-                        clusterSendUpdate(sender->link,
-                            server.cluster->slots[j]);
-
-                        /* TODO: instead of exiting the loop send every other
-                         * UPDATE packet for other nodes that are the new owner
-                         * of sender's slots. */
-                        break;
-                    }
-                }
-            }
-        }
-
-        /* If our config epoch collides with the sender's try to fix
-         * the problem. */
-        if (sender &&
-            nodeIsMaster(myself) && nodeIsMaster(sender) &&
-            senderConfigEpoch == myself->configEpoch)
-        {
-            clusterHandleConfigEpochCollision(sender);
-        }
-
-        /* Get info from the gossip section */
-        if (sender) {
-            clusterProcessGossipSection(hdr,link);
-            clusterProcessPingExtensions(hdr,link);
-        }
-    } else if (type == CLUSTERMSG_TYPE_FAIL) {
-        clusterNode *failing;
-
-        if (sender) {
-            failing = clusterLookupNode(hdr->data.fail.about.nodename, CLUSTER_NAMELEN);
-            if (failing &&
-                !(failing->flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_MYSELF)))
-            {
-                serverLog(LL_NOTICE,
-                    "FAIL message received from %.40s (%s) about %.40s (%s)",
-                    hdr->sender, sender->human_nodename, hdr->data.fail.about.nodename, failing->human_nodename);
-                failing->flags |= CLUSTER_NODE_FAIL;
-                failing->fail_time = now;
-                failing->flags &= ~CLUSTER_NODE_PFAIL;
-                clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
-                                     CLUSTER_TODO_UPDATE_STATE);
-            }
-        } else {
-            serverLog(LL_NOTICE,
-                "Ignoring FAIL message from unknown node %.40s about %.40s",
-                hdr->sender, hdr->data.fail.about.nodename);
-        }
-    } else if (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) {
-        if (!sender) return 1;  /* We don't know that node. */
-
-        robj *channel, *message;
-        uint32_t channel_len, message_len;
-
-        /* Don't bother creating useless objects if there are no
-         * Pub/Sub subscribers. */
-        if ((type == CLUSTERMSG_TYPE_PUBLISH
-            && serverPubsubSubscriptionCount() > 0)
-        || (type == CLUSTERMSG_TYPE_PUBLISHSHARD
-            && serverPubsubShardSubscriptionCount() > 0))
-        {
-            channel_len = ntohl(hdr->data.publish.msg.channel_len);
-            message_len = ntohl(hdr->data.publish.msg.message_len);
-            channel = createStringObject(
-                        (char*)hdr->data.publish.msg.bulk_data,channel_len);
-            message = createStringObject(
-                        (char*)hdr->data.publish.msg.bulk_data+channel_len,
-                        message_len);
-            pubsubPublishMessage(channel, message, type == CLUSTERMSG_TYPE_PUBLISHSHARD);
-            decrRefCount(channel);
-            decrRefCount(message);
-        }
-    } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST) {
-        if (!sender) return 1;  /* We don't know that node. */
-        clusterSendFailoverAuthIfNeeded(sender,hdr);
-    } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK) {
-        if (!sender) return 1;  /* We don't know that node. */
-        /* We consider this vote only if the sender is a master serving
-         * a non zero number of slots, and its currentEpoch is greater or
-         * equal to epoch where this node started the election. */
-        if (nodeIsMaster(sender) && sender->numslots > 0 &&
-            senderCurrentEpoch >= server.cluster->failover_auth_epoch)
-        {
-            server.cluster->failover_auth_count++;
-            /* Maybe we reached a quorum here, set a flag to make sure
-             * we check ASAP. */
-            clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_FAILOVER);
-        }
-    } else if (type == CLUSTERMSG_TYPE_MFSTART) {
-        /* This message is acceptable only if I'm a master and the sender
-         * is one of my slaves. */
-        if (!sender || sender->slaveof != myself) return 1;
-        /* Manual failover requested from slaves. Initialize the state
-         * accordingly. */
-        resetManualFailover();
-        server.cluster->mf_end = now + CLUSTER_MF_TIMEOUT;
-        server.cluster->mf_slave = sender;
-        pauseActions(PAUSE_DURING_FAILOVER,
-                     now + (CLUSTER_MF_TIMEOUT * CLUSTER_MF_PAUSE_MULT),
-                     PAUSE_ACTIONS_CLIENT_WRITE_SET);
-        serverLog(LL_NOTICE,"Manual failover requested by replica %.40s (%s).",
-            sender->name, sender->human_nodename);
-        /* We need to send a ping message to the replica, as it would carry
-         * `server.cluster->mf_master_offset`, which means the master paused clients
-         * at offset `server.cluster->mf_master_offset`, so that the replica would
-         * know that it is safe to set its `server.cluster->mf_can_start` to 1 so as
-         * to complete failover as quickly as possible. */
-        clusterSendPing(link, CLUSTERMSG_TYPE_PING);
-    } else if (type == CLUSTERMSG_TYPE_UPDATE) {
-        clusterNode *n; /* The node the update is about. */
-        uint64_t reportedConfigEpoch =
-                    ntohu64(hdr->data.update.nodecfg.configEpoch);
-
-        if (!sender) return 1;  /* We don't know the sender. */
-        n = clusterLookupNode(hdr->data.update.nodecfg.nodename, CLUSTER_NAMELEN);
-        if (!n) return 1;   /* We don't know the reported node. */
-        if (n->configEpoch >= reportedConfigEpoch) return 1; /* Nothing new. */
-
-        /* If in our current config the node is a slave, set it as a master. */
-        if (nodeIsSlave(n)) clusterSetNodeAsMaster(n);
-
-        /* Update the node's configEpoch. */
-        n->configEpoch = reportedConfigEpoch;
-        clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
-                             CLUSTER_TODO_FSYNC_CONFIG);
-
-        /* Check the bitmap of served slots and update our
-         * config accordingly. */
-        clusterUpdateSlotsConfigWith(n,reportedConfigEpoch,
-            hdr->data.update.nodecfg.slots);
-    } else if (type == CLUSTERMSG_TYPE_MODULE) {
-        if (!sender) return 1;  /* Protect the module from unknown nodes. */
-        /* We need to route this message back to the right module subscribed
-         * for the right message type. */
-        uint64_t module_id = hdr->data.module.msg.module_id; /* Endian-safe ID */
-        uint32_t len = ntohl(hdr->data.module.msg.len);
-        uint8_t type = hdr->data.module.msg.type;
-        unsigned char *payload = hdr->data.module.msg.bulk_data;
-        moduleCallClusterReceivers(sender->name,module_id,type,payload,len);
-    } else {
-        serverLog(LL_WARNING,"Received unknown packet type: %d", type);
-    }
-    return 1;
-}
-
-/* This function is called when we detect the link with this node is lost.
-   We set the node as no longer connected. The Cluster Cron will detect
-   this connection and will try to get it connected again.
-
-   Instead if the node is a temporary node used to accept a query, we
-   completely free the node on error. */
-void handleLinkIOError(clusterLink *link) {
-    freeClusterLink(link);
-}
-
-/* Send the messages queued for the link. */
-void clusterWriteHandler(connection *conn) {
-    clusterLink *link = connGetPrivateData(conn);
-    ssize_t nwritten;
-    size_t totwritten = 0;
-
-    while (totwritten < NET_MAX_WRITES_PER_EVENT && listLength(link->send_msg_queue) > 0) {
-        listNode *head = listFirst(link->send_msg_queue);
-        clusterMsgSendBlock *msgblock = (clusterMsgSendBlock*)head->value;
-        clusterMsg *msg = &msgblock->msg;
-        size_t msg_offset = link->head_msg_send_offset;
-        size_t msg_len = ntohl(msg->totlen);
-
-        nwritten = connWrite(conn, (char*)msg + msg_offset, msg_len - msg_offset);
-        if (nwritten <= 0) {
-            serverLog(LL_DEBUG,"I/O error writing to node link: %s",
-                (nwritten == -1) ? connGetLastError(conn) : "short write");
-            handleLinkIOError(link);
-            return;
-        }
-        if (msg_offset + nwritten < msg_len) {
-            /* If full message wasn't written, record the offset
-             * and continue sending from this point next time */
-            link->head_msg_send_offset += nwritten;
-            return;
-        }
-        serverAssert((msg_offset + nwritten) == msg_len);
-        link->head_msg_send_offset = 0;
-
-        /* Delete the node and update our memory tracking */
-        uint32_t blocklen = msgblock->totlen;
-        listDelNode(link->send_msg_queue, head);
-        server.stat_cluster_links_memory -= sizeof(listNode);
-        link->send_msg_queue_mem -= sizeof(listNode) + blocklen;
-
-        totwritten += nwritten;
-    }
-
-    if (listLength(link->send_msg_queue) == 0)
-        connSetWriteHandler(link->conn, NULL);
-}
-
-/* A connect handler that gets called when a connection to another node
- * gets established.
- */
-void clusterLinkConnectHandler(connection *conn) {
-    clusterLink *link = connGetPrivateData(conn);
-    clusterNode *node = link->node;
-
-    /* Check if connection succeeded */
-    if (connGetState(conn) != CONN_STATE_CONNECTED) {
-        serverLog(LL_VERBOSE, "Connection with Node %.40s at %s:%d failed: %s",
-                node->name, node->ip, node->cport,
-                connGetLastError(conn));
-        freeClusterLink(link);
-        return;
-    }
-
-    /* Register a read handler from now on */
-    connSetReadHandler(conn, clusterReadHandler);
-
-    /* Queue a PING in the new connection ASAP: this is crucial
-     * to avoid false positives in failure detection.
-     *
-     * If the node is flagged as MEET, we send a MEET message instead
-     * of a PING one, to force the receiver to add us in its node
-     * table. */
-    mstime_t old_ping_sent = node->ping_sent;
-    clusterSendPing(link, node->flags & CLUSTER_NODE_MEET ?
-            CLUSTERMSG_TYPE_MEET : CLUSTERMSG_TYPE_PING);
-    if (old_ping_sent) {
-        /* If there was an active ping before the link was
-         * disconnected, we want to restore the ping time, otherwise
-         * replaced by the clusterSendPing() call. */
-        node->ping_sent = old_ping_sent;
-    }
-    /* We can clear the flag after the first packet is sent.
-     * If we'll never receive a PONG, we'll never send new packets
-     * to this node. Instead after the PONG is received and we
-     * are no longer in meet/handshake status, we want to send
-     * normal PING packets. */
-    node->flags &= ~CLUSTER_NODE_MEET;
-
-    serverLog(LL_DEBUG,"Connecting with Node %.40s at %s:%d",
-            node->name, node->ip, node->cport);
-}
-
-/* Read data. Try to read the first field of the header first to check the
- * full length of the packet. When a whole packet is in memory this function
- * will call the function to process the packet. And so forth. */
-void clusterReadHandler(connection *conn) {
-    clusterMsg buf[1];
-    ssize_t nread;
-    clusterMsg *hdr;
-    clusterLink *link = connGetPrivateData(conn);
-    unsigned int readlen, rcvbuflen;
-
-    while(1) { /* Read as long as there is data to read. */
-        rcvbuflen = link->rcvbuf_len;
-        if (rcvbuflen < 8) {
-            /* First, obtain the first 8 bytes to get the full message
-             * length. */
-            readlen = 8 - rcvbuflen;
-        } else {
-            /* Finally read the full message. */
-            hdr = (clusterMsg*) link->rcvbuf;
-            if (rcvbuflen == 8) {
-                /* Perform some sanity check on the message signature
-                 * and length. */
-                if (memcmp(hdr->sig,"RCmb",4) != 0 ||
-                    ntohl(hdr->totlen) < CLUSTERMSG_MIN_LEN)
-                {
-                    char ip[NET_IP_STR_LEN];
-                    int port;
-                    if (connAddrPeerName(conn, ip, sizeof(ip), &port) == -1) {
-                        serverLog(LL_WARNING,
-                            "Bad message length or signature received "
-                            "on the Cluster bus.");
-                    } else {
-                        serverLog(LL_WARNING,
-                            "Bad message length or signature received "
-                            "on the Cluster bus from %s:%d", ip, port);
-                    }
-                    handleLinkIOError(link);
-                    return;
-                }
-            }
-            readlen = ntohl(hdr->totlen) - rcvbuflen;
-            if (readlen > sizeof(buf)) readlen = sizeof(buf);
-        }
-
-        nread = connRead(conn,buf,readlen);
-        if (nread == -1 && (connGetState(conn) == CONN_STATE_CONNECTED)) return; /* No more data ready. */
-
-        if (nread <= 0) {
-            /* I/O error... */
-            serverLog(LL_DEBUG,"I/O error reading from node link: %s",
-                (nread == 0) ? "connection closed" : connGetLastError(conn));
-            handleLinkIOError(link);
-            return;
-        } else {
-            /* Read data and recast the pointer to the new buffer. */
-            size_t unused = link->rcvbuf_alloc - link->rcvbuf_len;
-            if ((size_t)nread > unused) {
-                size_t required = link->rcvbuf_len + nread;
-                size_t prev_rcvbuf_alloc = link->rcvbuf_alloc;
-                /* If less than 1mb, grow to twice the needed size, if larger grow by 1mb. */
-                link->rcvbuf_alloc = required < RCVBUF_MAX_PREALLOC ? required * 2: required + RCVBUF_MAX_PREALLOC;
-                link->rcvbuf = zrealloc(link->rcvbuf, link->rcvbuf_alloc);
-                server.stat_cluster_links_memory += link->rcvbuf_alloc - prev_rcvbuf_alloc;
-            }
-            memcpy(link->rcvbuf + link->rcvbuf_len, buf, nread);
-            link->rcvbuf_len += nread;
-            hdr = (clusterMsg*) link->rcvbuf;
-            rcvbuflen += nread;
-        }
-
-        /* Total length obtained? Process this packet. */
-        if (rcvbuflen >= 8 && rcvbuflen == ntohl(hdr->totlen)) {
-            if (clusterProcessPacket(link)) {
-                if (link->rcvbuf_alloc > RCVBUF_INIT_LEN) {
-                    size_t prev_rcvbuf_alloc = link->rcvbuf_alloc;
-                    zfree(link->rcvbuf);
-                    link->rcvbuf = zmalloc(link->rcvbuf_alloc = RCVBUF_INIT_LEN);
-                    server.stat_cluster_links_memory += link->rcvbuf_alloc - prev_rcvbuf_alloc;
-                }
-                link->rcvbuf_len = 0;
-            } else {
-                return; /* Link no longer valid. */
-            }
-        }
-    }
-}
-
-/* Put the message block into the link's send queue.
- *
- * It is guaranteed that this function will never have as a side effect
- * the link to be invalidated, so it is safe to call this function
- * from event handlers that will do stuff with the same link later. */
-void clusterSendMessage(clusterLink *link, clusterMsgSendBlock *msgblock) {
-    if (!link) {
-        return;
-    }
-    if (listLength(link->send_msg_queue) == 0 && msgblock->msg.totlen != 0)
-        connSetWriteHandlerWithBarrier(link->conn, clusterWriteHandler, 1);
-
-    listAddNodeTail(link->send_msg_queue, msgblock);
-    msgblock->refcount++;
-
-    /* Update memory tracking */
-    link->send_msg_queue_mem += sizeof(listNode) + msgblock->totlen;
-    server.stat_cluster_links_memory += sizeof(listNode);
-
-    /* Populate sent messages stats. */
-    uint16_t type = ntohs(msgblock->msg.type);
-    if (type < CLUSTERMSG_TYPE_COUNT)
-        server.cluster->stats_bus_messages_sent[type]++;
-}
-
-/* Send a message to all the nodes that are part of the cluster having
- * a connected link.
- *
- * It is guaranteed that this function will never have as a side effect
- * some node->link to be invalidated, so it is safe to call this function
- * from event handlers that will do stuff with node links later. */
-void clusterBroadcastMessage(clusterMsgSendBlock *msgblock) {
-    dictIterator *di;
-    dictEntry *de;
-
-    di = dictGetSafeIterator(server.cluster->nodes);
-    while((de = dictNext(di)) != NULL) {
-        clusterNode *node = dictGetVal(de);
-
-        if (node->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_HANDSHAKE))
-            continue;
-        clusterSendMessage(node->link,msgblock);
-    }
-    dictReleaseIterator(di);
-}
-
-/* Build the message header. hdr must point to a buffer at least
- * sizeof(clusterMsg) in bytes. */
-static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen) {
-    uint64_t offset;
-    clusterNode *master;
-
-    /* If this node is a master, we send its slots bitmap and configEpoch.
-     * If this node is a slave we send the master's information instead (the
-     * node is flagged as slave so the receiver knows that it is NOT really
-     * in charge for this slots. */
-    master = (nodeIsSlave(myself) && myself->slaveof) ?
-              myself->slaveof : myself;
-
-    hdr->ver = htons(CLUSTER_PROTO_VER);
-    hdr->sig[0] = 'R';
-    hdr->sig[1] = 'C';
-    hdr->sig[2] = 'm';
-    hdr->sig[3] = 'b';
-    hdr->type = htons(type);
-    memcpy(hdr->sender,myself->name,CLUSTER_NAMELEN);
-
-    /* If cluster-announce-ip option is enabled, force the receivers of our
-     * packets to use the specified address for this node. Otherwise if the
-     * first byte is zero, they'll do auto discovery. */
-    memset(hdr->myip,0,NET_IP_STR_LEN);
-    if (server.cluster_announce_ip) {
-        redis_strlcpy(hdr->myip,server.cluster_announce_ip,NET_IP_STR_LEN);
-    }
-
-    /* Handle cluster-announce-[tls-|bus-]port. */
-    int announced_tcp_port, announced_tls_port, announced_cport;
-    deriveAnnouncedPorts(&announced_tcp_port, &announced_tls_port, &announced_cport);
-
-    memcpy(hdr->myslots,master->slots,sizeof(hdr->myslots));
-    memset(hdr->slaveof,0,CLUSTER_NAMELEN);
-    if (myself->slaveof != NULL)
-        memcpy(hdr->slaveof,myself->slaveof->name, CLUSTER_NAMELEN);
-    if (server.tls_cluster) {
-        hdr->port = htons(announced_tls_port);
-        hdr->pport = htons(announced_tcp_port);
-    } else {
-        hdr->port = htons(announced_tcp_port);
-        hdr->pport = htons(announced_tls_port);
-    }
-    hdr->cport = htons(announced_cport);
-    hdr->flags = htons(myself->flags);
-    hdr->state = server.cluster->state;
-
-    /* Set the currentEpoch and configEpochs. */
-    hdr->currentEpoch = htonu64(server.cluster->currentEpoch);
-    hdr->configEpoch = htonu64(master->configEpoch);
-
-    /* Set the replication offset. */
-    if (nodeIsSlave(myself))
-        offset = replicationGetSlaveOffset();
-    else
-        offset = server.master_repl_offset;
-    hdr->offset = htonu64(offset);
-
-    /* Set the message flags. */
-    if (nodeIsMaster(myself) && server.cluster->mf_end)
-        hdr->mflags[0] |= CLUSTERMSG_FLAG0_PAUSED;
-
-    hdr->totlen = htonl(msglen);
-}
-
-/* Set the i-th entry of the gossip section in the message pointed by 'hdr'
- * to the info of the specified node 'n'. */
-void clusterSetGossipEntry(clusterMsg *hdr, int i, clusterNode *n) {
-    clusterMsgDataGossip *gossip;
-    gossip = &(hdr->data.ping.gossip[i]);
-    memcpy(gossip->nodename,n->name,CLUSTER_NAMELEN);
-    gossip->ping_sent = htonl(n->ping_sent/1000);
-    gossip->pong_received = htonl(n->pong_received/1000);
-    memcpy(gossip->ip,n->ip,sizeof(n->ip));
-    if (server.tls_cluster) {
-        gossip->port = htons(n->tls_port);
-        gossip->pport = htons(n->tcp_port);
-    } else {
-        gossip->port = htons(n->tcp_port);
-        gossip->pport = htons(n->tls_port);
-    }
-    gossip->cport = htons(n->cport);
-    gossip->flags = htons(n->flags);
-    gossip->notused1 = 0;
-}
-
-/* Send a PING or PONG packet to the specified node, making sure to add enough
- * gossip information. */
-void clusterSendPing(clusterLink *link, int type) {
-    static unsigned long long cluster_pings_sent = 0;
-    cluster_pings_sent++;
-    int gossipcount = 0; /* Number of gossip sections added so far. */
-    int wanted; /* Number of gossip sections we want to append if possible. */
-    int estlen; /* Upper bound on estimated packet length */
-    /* freshnodes is the max number of nodes we can hope to append at all:
-     * nodes available minus two (ourself and the node we are sending the
-     * message to). However practically there may be less valid nodes since
-     * nodes in handshake state, disconnected, are not considered. */
-    int freshnodes = dictSize(server.cluster->nodes)-2;
-
-    /* How many gossip sections we want to add? 1/10 of the number of nodes
-     * and anyway at least 3. Why 1/10?
-     *
-     * If we have N masters, with N/10 entries, and we consider that in
-     * node_timeout we exchange with each other node at least 4 packets
-     * (we ping in the worst case in node_timeout/2 time, and we also
-     * receive two pings from the host), we have a total of 8 packets
-     * in the node_timeout*2 failure reports validity time. So we have
-     * that, for a single PFAIL node, we can expect to receive the following
-     * number of failure reports (in the specified window of time):
-     *
-     * PROB * GOSSIP_ENTRIES_PER_PACKET * TOTAL_PACKETS:
-     *
-     * PROB = probability of being featured in a single gossip entry,
-     *        which is 1 / NUM_OF_NODES.
-     * ENTRIES = 10.
-     * TOTAL_PACKETS = 2 * 4 * NUM_OF_MASTERS.
-     *
-     * If we assume we have just masters (so num of nodes and num of masters
-     * is the same), with 1/10 we always get over the majority, and specifically
-     * 80% of the number of nodes, to account for many masters failing at the
-     * same time.
-     *
-     * Since we have non-voting slaves that lower the probability of an entry
-     * to feature our node, we set the number of entries per packet as
-     * 10% of the total nodes we have. */
-    wanted = floor(dictSize(server.cluster->nodes)/10);
-    if (wanted < 3) wanted = 3;
-    if (wanted > freshnodes) wanted = freshnodes;
-
-    /* Include all the nodes in PFAIL state, so that failure reports are
-     * faster to propagate to go from PFAIL to FAIL state. */
-    int pfail_wanted = server.cluster->stats_pfail_nodes;
-
-    /* Compute the maximum estlen to allocate our buffer. We'll fix the estlen
-     * later according to the number of gossip sections we really were able
-     * to put inside the packet. */
-    estlen = sizeof(clusterMsg) - sizeof(union clusterMsgData);
-    estlen += (sizeof(clusterMsgDataGossip)*(wanted + pfail_wanted));
-    estlen += writePingExt(NULL, 0);
-    /* Note: clusterBuildMessageHdr() expects the buffer to be always at least
-     * sizeof(clusterMsg) or more. */
-    if (estlen < (int)sizeof(clusterMsg)) estlen = sizeof(clusterMsg);
-    clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(type, estlen);
-    clusterMsg *hdr = &msgblock->msg;
-
-    if (!link->inbound && type == CLUSTERMSG_TYPE_PING)
-        link->node->ping_sent = mstime();
-
-    /* Populate the gossip fields */
-    int maxiterations = wanted*3;
-    while(freshnodes > 0 && gossipcount < wanted && maxiterations--) {
-        dictEntry *de = dictGetRandomKey(server.cluster->nodes);
-        clusterNode *this = dictGetVal(de);
-
-        /* Don't include this node: the whole packet header is about us
-         * already, so we just gossip about other nodes. */
-        if (this == myself) continue;
-
-        /* PFAIL nodes will be added later. */
-        if (this->flags & CLUSTER_NODE_PFAIL) continue;
-
-        /* In the gossip section don't include:
-         * 1) Nodes in HANDSHAKE state.
-         * 3) Nodes with the NOADDR flag set.
-         * 4) Disconnected nodes if they don't have configured slots.
-         */
-        if (this->flags & (CLUSTER_NODE_HANDSHAKE|CLUSTER_NODE_NOADDR) ||
-            (this->link == NULL && this->numslots == 0))
-        {
-            freshnodes--; /* Technically not correct, but saves CPU. */
-            continue;
-        }
-
-        /* Do not add a node we already have. */
-        if (this->last_in_ping_gossip == cluster_pings_sent) continue;
-
-        /* Add it */
-        clusterSetGossipEntry(hdr,gossipcount,this);
-        this->last_in_ping_gossip = cluster_pings_sent;
-        freshnodes--;
-        gossipcount++;
-    }
-
-    /* If there are PFAIL nodes, add them at the end. */
-    if (pfail_wanted) {
-        dictIterator *di;
-        dictEntry *de;
-
-        di = dictGetSafeIterator(server.cluster->nodes);
-        while((de = dictNext(di)) != NULL && pfail_wanted > 0) {
-            clusterNode *node = dictGetVal(de);
-            if (node->flags & CLUSTER_NODE_HANDSHAKE) continue;
-            if (node->flags & CLUSTER_NODE_NOADDR) continue;
-            if (!(node->flags & CLUSTER_NODE_PFAIL)) continue;
-            clusterSetGossipEntry(hdr,gossipcount,node);
-            gossipcount++;
-            /* We take the count of the slots we allocated, since the
-             * PFAIL stats may not match perfectly with the current number
-             * of PFAIL nodes. */
-            pfail_wanted--;
-        }
-        dictReleaseIterator(di);
-    }
-
-    /* Compute the actual total length and send! */
-    uint32_t totlen = 0;
-    totlen += writePingExt(hdr, gossipcount);
-    totlen += sizeof(clusterMsg)-sizeof(union clusterMsgData);
-    totlen += (sizeof(clusterMsgDataGossip)*gossipcount);
-    serverAssert(gossipcount < USHRT_MAX);
-    hdr->count = htons(gossipcount);
-    hdr->totlen = htonl(totlen);
-
-    clusterSendMessage(link,msgblock);
-    clusterMsgSendBlockDecrRefCount(msgblock);
-}
-
-/* Send a PONG packet to every connected node that's not in handshake state
- * and for which we have a valid link.
- *
- * In Redis Cluster pongs are not used just for failure detection, but also
- * to carry important configuration information. So broadcasting a pong is
- * useful when something changes in the configuration and we want to make
- * the cluster aware ASAP (for instance after a slave promotion).
- *
- * The 'target' argument specifies the receiving instances using the
- * defines below:
- *
- * CLUSTER_BROADCAST_ALL -> All known instances.
- * CLUSTER_BROADCAST_LOCAL_SLAVES -> All slaves in my master-slaves ring.
- */
-#define CLUSTER_BROADCAST_ALL 0
-#define CLUSTER_BROADCAST_LOCAL_SLAVES 1
-void clusterBroadcastPong(int target) {
-    dictIterator *di;
-    dictEntry *de;
-
-    di = dictGetSafeIterator(server.cluster->nodes);
-    while((de = dictNext(di)) != NULL) {
-        clusterNode *node = dictGetVal(de);
-
-        if (!node->link) continue;
-        if (node == myself || nodeInHandshake(node)) continue;
-        if (target == CLUSTER_BROADCAST_LOCAL_SLAVES) {
-            int local_slave =
-                nodeIsSlave(node) && node->slaveof &&
-                (node->slaveof == myself || node->slaveof == myself->slaveof);
-            if (!local_slave) continue;
-        }
-        clusterSendPing(node->link,CLUSTERMSG_TYPE_PONG);
-    }
-    dictReleaseIterator(di);
-}
-
-/* Create a PUBLISH message block.
- *
- * Sanitizer suppression: In clusterMsgDataPublish, sizeof(bulk_data) is 8.
- * As all the struct is used as a buffer, when more than 8 bytes are copied into
- * the 'bulk_data', sanitizer generates an out-of-bounds error which is a false
- * positive in this context. */
-REDIS_NO_SANITIZE("bounds")
-clusterMsgSendBlock *clusterCreatePublishMsgBlock(robj *channel, robj *message, uint16_t type) {
-
-    uint32_t channel_len, message_len;
-
-    channel = getDecodedObject(channel);
-    message = getDecodedObject(message);
-    channel_len = sdslen(channel->ptr);
-    message_len = sdslen(message->ptr);
-
-    size_t msglen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
-    msglen += sizeof(clusterMsgDataPublish) - 8 + channel_len + message_len;
-    clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(type, msglen);
-
-    clusterMsg *hdr = &msgblock->msg;
-    hdr->data.publish.msg.channel_len = htonl(channel_len);
-    hdr->data.publish.msg.message_len = htonl(message_len);
-    memcpy(hdr->data.publish.msg.bulk_data,channel->ptr,sdslen(channel->ptr));
-    memcpy(hdr->data.publish.msg.bulk_data+sdslen(channel->ptr),
-        message->ptr,sdslen(message->ptr));
-
-    decrRefCount(channel);
-    decrRefCount(message);
-    
-    return msgblock;
-}
-
-/* Send a FAIL message to all the nodes we are able to contact.
- * The FAIL message is sent when we detect that a node is failing
- * (CLUSTER_NODE_PFAIL) and we also receive a gossip confirmation of this:
- * we switch the node state to CLUSTER_NODE_FAIL and ask all the other
- * nodes to do the same ASAP. */
-void clusterSendFail(char *nodename) {
-    uint32_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData)
-        + sizeof(clusterMsgDataFail);
-    clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_FAIL, msglen);
-
-    clusterMsg *hdr = &msgblock->msg;
-    memcpy(hdr->data.fail.about.nodename,nodename,CLUSTER_NAMELEN);
-
-    clusterBroadcastMessage(msgblock);
-    clusterMsgSendBlockDecrRefCount(msgblock);
-}
-
-/* Send an UPDATE message to the specified link carrying the specified 'node'
- * slots configuration. The node name, slots bitmap, and configEpoch info
- * are included. */
-void clusterSendUpdate(clusterLink *link, clusterNode *node) {
-    if (link == NULL) return;
-
-    uint32_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData)
-        + sizeof(clusterMsgDataUpdate);
-    clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_UPDATE, msglen);
-
-    clusterMsg *hdr = &msgblock->msg;
-    memcpy(hdr->data.update.nodecfg.nodename,node->name,CLUSTER_NAMELEN);
-    hdr->data.update.nodecfg.configEpoch = htonu64(node->configEpoch);
-    memcpy(hdr->data.update.nodecfg.slots,node->slots,sizeof(node->slots));
-    for (unsigned int i = 0; i < sizeof(node->slots); i++) {
-        /* Don't advertise slots that the node stopped claiming */
-        hdr->data.update.nodecfg.slots[i] = hdr->data.update.nodecfg.slots[i] & (~server.cluster->owner_not_claiming_slot[i]);
-    }
-
-    clusterSendMessage(link,msgblock);
-    clusterMsgSendBlockDecrRefCount(msgblock);
-}
-
-/* Send a MODULE message.
- *
- * If link is NULL, then the message is broadcasted to the whole cluster. */
-void clusterSendModule(clusterLink *link, uint64_t module_id, uint8_t type,
-                       const char *payload, uint32_t len) {
-    uint32_t msglen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
-    msglen += sizeof(clusterMsgModule) - 3 + len;
-    clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_MODULE, msglen);
-
-    clusterMsg *hdr = &msgblock->msg;
-    hdr->data.module.msg.module_id = module_id; /* Already endian adjusted. */
-    hdr->data.module.msg.type = type;
-    hdr->data.module.msg.len = htonl(len);
-    memcpy(hdr->data.module.msg.bulk_data,payload,len);
-
-    if (link)
-        clusterSendMessage(link,msgblock);
-    else
-        clusterBroadcastMessage(msgblock);
-
-    clusterMsgSendBlockDecrRefCount(msgblock);
-}
-
-/* This function gets a cluster node ID string as target, the same way the nodes
- * addresses are represented in the modules side, resolves the node, and sends
- * the message. If the target is NULL the message is broadcasted.
- *
- * The function returns C_OK if the target is valid, otherwise C_ERR is
- * returned. */
-int clusterSendModuleMessageToTarget(const char *target, uint64_t module_id, uint8_t type, const char *payload, uint32_t len) {
-    clusterNode *node = NULL;
-
-    if (target != NULL) {
-        node = clusterLookupNode(target, strlen(target));
-        if (node == NULL || node->link == NULL) return C_ERR;
-    }
-
-    clusterSendModule(target ? node->link : NULL,
-                      module_id, type, payload, len);
-    return C_OK;
-}
-
-/* -----------------------------------------------------------------------------
- * CLUSTER Pub/Sub support
- *
- * If `sharded` is 0:
- * For now we do very little, just propagating [S]PUBLISH messages across the whole
- * cluster. In the future we'll try to get smarter and avoiding propagating those
- * messages to hosts without receives for a given channel.
- * Otherwise:
- * Publish this message across the slot (primary/replica).
- * -------------------------------------------------------------------------- */
-void clusterPropagatePublish(robj *channel, robj *message, int sharded) {
-    clusterMsgSendBlock *msgblock;
-
-    if (!sharded) {
-        msgblock = clusterCreatePublishMsgBlock(channel, message, CLUSTERMSG_TYPE_PUBLISH);
-        clusterBroadcastMessage(msgblock);
-        clusterMsgSendBlockDecrRefCount(msgblock);
-        return;
-    }
-
-    listIter li;
-    listNode *ln;
-    list *nodes_for_slot = clusterGetNodesInMyShard(server.cluster->myself);
-    serverAssert(nodes_for_slot != NULL);
-    listRewind(nodes_for_slot, &li);
-    msgblock = clusterCreatePublishMsgBlock(channel, message, CLUSTERMSG_TYPE_PUBLISHSHARD);
-    while((ln = listNext(&li))) {
-        clusterNode *node = listNodeValue(ln);
-        if (node->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_HANDSHAKE))
-            continue;
-        clusterSendMessage(node->link,msgblock);
-    }
-    clusterMsgSendBlockDecrRefCount(msgblock);
-}
-
-/* -----------------------------------------------------------------------------
- * SLAVE node specific functions
- * -------------------------------------------------------------------------- */
-
-/* This function sends a FAILOVER_AUTH_REQUEST message to every node in order to
- * see if there is the quorum for this slave instance to failover its failing
- * master.
- *
- * Note that we send the failover request to everybody, master and slave nodes,
- * but only the masters are supposed to reply to our query. */
-void clusterRequestFailoverAuth(void) {
-    uint32_t msglen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
-    clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST, msglen);
-
-    clusterMsg *hdr = &msgblock->msg;
-    /* If this is a manual failover, set the CLUSTERMSG_FLAG0_FORCEACK bit
-     * in the header to communicate the nodes receiving the message that
-     * they should authorized the failover even if the master is working. */
-    if (server.cluster->mf_end) hdr->mflags[0] |= CLUSTERMSG_FLAG0_FORCEACK;
-    clusterBroadcastMessage(msgblock);
-    clusterMsgSendBlockDecrRefCount(msgblock);
-}
-
-/* Send a FAILOVER_AUTH_ACK message to the specified node. */
-void clusterSendFailoverAuth(clusterNode *node) {
-    if (!node->link) return;
-
-    uint32_t msglen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
-    clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK, msglen);
-
-    clusterSendMessage(node->link,msgblock);
-    clusterMsgSendBlockDecrRefCount(msgblock);
-}
-
-/* Send a MFSTART message to the specified node. */
-void clusterSendMFStart(clusterNode *node) {
-    if (!node->link) return;
-
-    uint32_t msglen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
-    clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_MFSTART, msglen);
-
-    clusterSendMessage(node->link,msgblock);
-    clusterMsgSendBlockDecrRefCount(msgblock);
-}
-
-/* Vote for the node asking for our vote if there are the conditions. */
-void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) {
-    clusterNode *master = node->slaveof;
-    uint64_t requestCurrentEpoch = ntohu64(request->currentEpoch);
-    uint64_t requestConfigEpoch = ntohu64(request->configEpoch);
-    unsigned char *claimed_slots = request->myslots;
-    int force_ack = request->mflags[0] & CLUSTERMSG_FLAG0_FORCEACK;
-    int j;
-
-    /* IF we are not a master serving at least 1 slot, we don't have the
-     * right to vote, as the cluster size in Redis Cluster is the number
-     * of masters serving at least one slot, and quorum is the cluster
-     * size + 1 */
-    if (nodeIsSlave(myself) || myself->numslots == 0) return;
-
-    /* Request epoch must be >= our currentEpoch.
-     * Note that it is impossible for it to actually be greater since
-     * our currentEpoch was updated as a side effect of receiving this
-     * request, if the request epoch was greater. */
-    if (requestCurrentEpoch < server.cluster->currentEpoch) {
-        serverLog(LL_WARNING,
-            "Failover auth denied to %.40s (%s): reqEpoch (%llu) < curEpoch(%llu)",
-            node->name, node->human_nodename,
-            (unsigned long long) requestCurrentEpoch,
-            (unsigned long long) server.cluster->currentEpoch);
-        return;
-    }
-
-    /* I already voted for this epoch? Return ASAP. */
-    if (server.cluster->lastVoteEpoch == server.cluster->currentEpoch) {
-        serverLog(LL_WARNING,
-                "Failover auth denied to %.40s (%s): already voted for epoch %llu",
-                node->name, node->human_nodename,
-                (unsigned long long) server.cluster->currentEpoch);
-        return;
-    }
-
-    /* Node must be a slave and its master down.
-     * The master can be non failing if the request is flagged
-     * with CLUSTERMSG_FLAG0_FORCEACK (manual failover). */
-    if (nodeIsMaster(node) || master == NULL ||
-        (!nodeFailed(master) && !force_ack))
-    {
-        if (nodeIsMaster(node)) {
-            serverLog(LL_WARNING,
-                    "Failover auth denied to %.40s (%s): it is a master node",
-                    node->name, node->human_nodename);
-        } else if (master == NULL) {
-            serverLog(LL_WARNING,
-                    "Failover auth denied to %.40s (%s): I don't know its master",
-                    node->name, node->human_nodename);
-        } else if (!nodeFailed(master)) {
-            serverLog(LL_WARNING,
-                    "Failover auth denied to %.40s (%s): its master is up",
-                    node->name, node->human_nodename);
-        }
-        return;
-    }
-
-    /* We did not voted for a slave about this master for two
-     * times the node timeout. This is not strictly needed for correctness
-     * of the algorithm but makes the base case more linear. */
-    if (mstime() - node->slaveof->voted_time < server.cluster_node_timeout * 2)
-    {
-        serverLog(LL_WARNING,
-                "Failover auth denied to %.40s %s: "
-                "can't vote about this master before %lld milliseconds",
-                node->name, node->human_nodename,
-                (long long) ((server.cluster_node_timeout*2)-
-                             (mstime() - node->slaveof->voted_time)));
-        return;
-    }
-
-    /* The slave requesting the vote must have a configEpoch for the claimed
-     * slots that is >= the one of the masters currently serving the same
-     * slots in the current configuration. */
-    for (j = 0; j < CLUSTER_SLOTS; j++) {
-        if (bitmapTestBit(claimed_slots, j) == 0) continue;
-        if (isSlotUnclaimed(j) ||
-            server.cluster->slots[j]->configEpoch <= requestConfigEpoch)
-        {
-            continue;
-        }
-        /* If we reached this point we found a slot that in our current slots
-         * is served by a master with a greater configEpoch than the one claimed
-         * by the slave requesting our vote. Refuse to vote for this slave. */
-        serverLog(LL_WARNING,
-                "Failover auth denied to %.40s (%s): "
-                "slot %d epoch (%llu) > reqEpoch (%llu)",
-                node->name, node->human_nodename, j,
-                (unsigned long long) server.cluster->slots[j]->configEpoch,
-                (unsigned long long) requestConfigEpoch);
-        return;
-    }
-
-    /* We can vote for this slave. */
-    server.cluster->lastVoteEpoch = server.cluster->currentEpoch;
-    node->slaveof->voted_time = mstime();
-    clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|CLUSTER_TODO_FSYNC_CONFIG);
-    clusterSendFailoverAuth(node);
-    serverLog(LL_NOTICE, "Failover auth granted to %.40s (%s) for epoch %llu",
-        node->name, node->human_nodename, (unsigned long long) server.cluster->currentEpoch);
-}
-
-/* This function returns the "rank" of this instance, a slave, in the context
- * of its master-slaves ring. The rank of the slave is given by the number of
- * other slaves for the same master that have a better replication offset
- * compared to the local one (better means, greater, so they claim more data).
- *
- * A slave with rank 0 is the one with the greatest (most up to date)
- * replication offset, and so forth. Note that because how the rank is computed
- * multiple slaves may have the same rank, in case they have the same offset.
- *
- * The slave rank is used to add a delay to start an election in order to
- * get voted and replace a failing master. Slaves with better replication
- * offsets are more likely to win. */
-int clusterGetSlaveRank(void) {
-    long long myoffset;
-    int j, rank = 0;
-    clusterNode *master;
-
-    serverAssert(nodeIsSlave(myself));
-    master = myself->slaveof;
-    if (master == NULL) return 0; /* Never called by slaves without master. */
-
-    myoffset = replicationGetSlaveOffset();
-    for (j = 0; j < master->numslaves; j++)
-        if (master->slaves[j] != myself &&
-            !nodeCantFailover(master->slaves[j]) &&
-            master->slaves[j]->repl_offset > myoffset) rank++;
-    return rank;
-}
-
-/* This function is called by clusterHandleSlaveFailover() in order to
- * let the slave log why it is not able to failover. Sometimes there are
- * not the conditions, but since the failover function is called again and
- * again, we can't log the same things continuously.
- *
- * This function works by logging only if a given set of conditions are
- * true:
- *
- * 1) The reason for which the failover can't be initiated changed.
- *    The reasons also include a NONE reason we reset the state to
- *    when the slave finds that its master is fine (no FAIL flag).
- * 2) Also, the log is emitted again if the master is still down and
- *    the reason for not failing over is still the same, but more than
- *    CLUSTER_CANT_FAILOVER_RELOG_PERIOD seconds elapsed.
- * 3) Finally, the function only logs if the slave is down for more than
- *    five seconds + NODE_TIMEOUT. This way nothing is logged when a
- *    failover starts in a reasonable time.
- *
- * The function is called with the reason why the slave can't failover
- * which is one of the integer macros CLUSTER_CANT_FAILOVER_*.
- *
- * The function is guaranteed to be called only if 'myself' is a slave. */
-void clusterLogCantFailover(int reason) {
-    char *msg;
-    static time_t lastlog_time = 0;
-    mstime_t nolog_fail_time = server.cluster_node_timeout + 5000;
-
-    /* Don't log if we have the same reason for some time. */
-    if (reason == server.cluster->cant_failover_reason &&
-        time(NULL)-lastlog_time < CLUSTER_CANT_FAILOVER_RELOG_PERIOD)
-        return;
-
-    server.cluster->cant_failover_reason = reason;
-
-    /* We also don't emit any log if the master failed no long ago, the
-     * goal of this function is to log slaves in a stalled condition for
-     * a long time. */
-    if (myself->slaveof &&
-        nodeFailed(myself->slaveof) &&
-        (mstime() - myself->slaveof->fail_time) < nolog_fail_time) return;
-
-    switch(reason) {
-    case CLUSTER_CANT_FAILOVER_DATA_AGE:
-        msg = "Disconnected from master for longer than allowed. "
-              "Please check the 'cluster-replica-validity-factor' configuration "
-              "option.";
-        break;
-    case CLUSTER_CANT_FAILOVER_WAITING_DELAY:
-        msg = "Waiting the delay before I can start a new failover.";
-        break;
-    case CLUSTER_CANT_FAILOVER_EXPIRED:
-        msg = "Failover attempt expired.";
-        break;
-    case CLUSTER_CANT_FAILOVER_WAITING_VOTES:
-        msg = "Waiting for votes, but majority still not reached.";
-        break;
-    default:
-        msg = "Unknown reason code.";
-        break;
-    }
-    lastlog_time = time(NULL);
-    serverLog(LL_NOTICE,"Currently unable to failover: %s", msg);
-    
-    int cur_vote = server.cluster->failover_auth_count;
-    int cur_quorum = (server.cluster->size / 2) + 1;
-    /* Emits a log when an election is in progress and waiting for votes or when the failover attempt expired. */
-    if (reason == CLUSTER_CANT_FAILOVER_WAITING_VOTES || reason == CLUSTER_CANT_FAILOVER_EXPIRED) {
-        serverLog(LL_NOTICE, "Needed quorum: %d. Number of votes received so far: %d", cur_quorum, cur_vote);
-    } 
-}
-
-/* This function implements the final part of automatic and manual failovers,
- * where the slave grabs its master's hash slots, and propagates the new
- * configuration.
- *
- * Note that it's up to the caller to be sure that the node got a new
- * configuration epoch already. */
-void clusterFailoverReplaceYourMaster(void) {
-    int j;
-    clusterNode *oldmaster = myself->slaveof;
-
-    if (nodeIsMaster(myself) || oldmaster == NULL) return;
-
-    /* 1) Turn this node into a master. */
-    clusterSetNodeAsMaster(myself);
-    replicationUnsetMaster();
-
-    /* 2) Claim all the slots assigned to our master. */
-    for (j = 0; j < CLUSTER_SLOTS; j++) {
-        if (clusterNodeGetSlotBit(oldmaster,j)) {
-            clusterDelSlot(j);
-            clusterAddSlot(myself,j);
-        }
-    }
-
-    /* 3) Update state and save config. */
-    clusterUpdateState();
-    clusterSaveConfigOrDie(1);
-
-    /* 4) Pong all the other nodes so that they can update the state
-     *    accordingly and detect that we switched to master role. */
-    clusterBroadcastPong(CLUSTER_BROADCAST_ALL);
-
-    /* 5) If there was a manual failover in progress, clear the state. */
-    resetManualFailover();
-}
-
-/* This function is called if we are a slave node and our master serving
- * a non-zero amount of hash slots is in FAIL state.
- *
- * The goal of this function is:
- * 1) To check if we are able to perform a failover, is our data updated?
- * 2) Try to get elected by masters.
- * 3) Perform the failover informing all the other nodes.
- */
-void clusterHandleSlaveFailover(void) {
-    mstime_t data_age;
-    mstime_t auth_age = mstime() - server.cluster->failover_auth_time;
-    int needed_quorum = (server.cluster->size / 2) + 1;
-    int manual_failover = server.cluster->mf_end != 0 &&
-                          server.cluster->mf_can_start;
-    mstime_t auth_timeout, auth_retry_time;
-
-    server.cluster->todo_before_sleep &= ~CLUSTER_TODO_HANDLE_FAILOVER;
-
-    /* Compute the failover timeout (the max time we have to send votes
-     * and wait for replies), and the failover retry time (the time to wait
-     * before trying to get voted again).
-     *
-     * Timeout is MAX(NODE_TIMEOUT*2,2000) milliseconds.
-     * Retry is two times the Timeout.
-     */
-    auth_timeout = server.cluster_node_timeout*2;
-    if (auth_timeout < 2000) auth_timeout = 2000;
-    auth_retry_time = auth_timeout*2;
-
-    /* Pre conditions to run the function, that must be met both in case
-     * of an automatic or manual failover:
-     * 1) We are a slave.
-     * 2) Our master is flagged as FAIL, or this is a manual failover.
-     * 3) We don't have the no failover configuration set, and this is
-     *    not a manual failover.
-     * 4) It is serving slots. */
-    if (nodeIsMaster(myself) ||
-        myself->slaveof == NULL ||
-        (!nodeFailed(myself->slaveof) && !manual_failover) ||
-        (server.cluster_slave_no_failover && !manual_failover) ||
-        myself->slaveof->numslots == 0)
-    {
-        /* There are no reasons to failover, so we set the reason why we
-         * are returning without failing over to NONE. */
-        server.cluster->cant_failover_reason = CLUSTER_CANT_FAILOVER_NONE;
-        return;
-    }
-
-    /* Set data_age to the number of milliseconds we are disconnected from
-     * the master. */
-    if (server.repl_state == REPL_STATE_CONNECTED) {
-        data_age = (mstime_t)(server.unixtime - server.master->lastinteraction)
-                   * 1000;
-    } else {
-        data_age = (mstime_t)(server.unixtime - server.repl_down_since) * 1000;
-    }
-
-    /* Remove the node timeout from the data age as it is fine that we are
-     * disconnected from our master at least for the time it was down to be
-     * flagged as FAIL, that's the baseline. */
-    if (data_age > server.cluster_node_timeout)
-        data_age -= server.cluster_node_timeout;
-
-    /* Check if our data is recent enough according to the slave validity
-     * factor configured by the user.
-     *
-     * Check bypassed for manual failovers. */
-    if (server.cluster_slave_validity_factor &&
-        data_age >
-        (((mstime_t)server.repl_ping_slave_period * 1000) +
-         (server.cluster_node_timeout * server.cluster_slave_validity_factor)))
-    {
-        if (!manual_failover) {
-            clusterLogCantFailover(CLUSTER_CANT_FAILOVER_DATA_AGE);
-            return;
-        }
-    }
-
-    /* If the previous failover attempt timeout and the retry time has
-     * elapsed, we can setup a new one. */
-    if (auth_age > auth_retry_time) {
-        server.cluster->failover_auth_time = mstime() +
-            500 + /* Fixed delay of 500 milliseconds, let FAIL msg propagate. */
-            random() % 500; /* Random delay between 0 and 500 milliseconds. */
-        server.cluster->failover_auth_count = 0;
-        server.cluster->failover_auth_sent = 0;
-        server.cluster->failover_auth_rank = clusterGetSlaveRank();
-        /* We add another delay that is proportional to the slave rank.
-         * Specifically 1 second * rank. This way slaves that have a probably
-         * less updated replication offset, are penalized. */
-        server.cluster->failover_auth_time +=
-            server.cluster->failover_auth_rank * 1000;
-        /* However if this is a manual failover, no delay is needed. */
-        if (server.cluster->mf_end) {
-            server.cluster->failover_auth_time = mstime();
-            server.cluster->failover_auth_rank = 0;
-	    clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_FAILOVER);
-        }
-        serverLog(LL_NOTICE,
-            "Start of election delayed for %lld milliseconds "
-            "(rank #%d, offset %lld).",
-            server.cluster->failover_auth_time - mstime(),
-            server.cluster->failover_auth_rank,
-            replicationGetSlaveOffset());
-        /* Now that we have a scheduled election, broadcast our offset
-         * to all the other slaves so that they'll updated their offsets
-         * if our offset is better. */
-        clusterBroadcastPong(CLUSTER_BROADCAST_LOCAL_SLAVES);
-        return;
-    }
-
-    /* It is possible that we received more updated offsets from other
-     * slaves for the same master since we computed our election delay.
-     * Update the delay if our rank changed.
-     *
-     * Not performed if this is a manual failover. */
-    if (server.cluster->failover_auth_sent == 0 &&
-        server.cluster->mf_end == 0)
-    {
-        int newrank = clusterGetSlaveRank();
-        if (newrank > server.cluster->failover_auth_rank) {
-            long long added_delay =
-                (newrank - server.cluster->failover_auth_rank) * 1000;
-            server.cluster->failover_auth_time += added_delay;
-            server.cluster->failover_auth_rank = newrank;
-            serverLog(LL_NOTICE,
-                "Replica rank updated to #%d, added %lld milliseconds of delay.",
-                newrank, added_delay);
-        }
-    }
-
-    /* Return ASAP if we can't still start the election. */
-    if (mstime() < server.cluster->failover_auth_time) {
-        clusterLogCantFailover(CLUSTER_CANT_FAILOVER_WAITING_DELAY);
-        return;
-    }
-
-    /* Return ASAP if the election is too old to be valid. */
-    if (auth_age > auth_timeout) {
-        clusterLogCantFailover(CLUSTER_CANT_FAILOVER_EXPIRED);
-        return;
-    }
-
-    /* Ask for votes if needed. */
-    if (server.cluster->failover_auth_sent == 0) {
-        server.cluster->currentEpoch++;
-        server.cluster->failover_auth_epoch = server.cluster->currentEpoch;
-        serverLog(LL_NOTICE,"Starting a failover election for epoch %llu.",
-            (unsigned long long) server.cluster->currentEpoch);
-        clusterRequestFailoverAuth();
-        server.cluster->failover_auth_sent = 1;
-        clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
-                             CLUSTER_TODO_UPDATE_STATE|
-                             CLUSTER_TODO_FSYNC_CONFIG);
-        return; /* Wait for replies. */
-    }
-
-    /* Check if we reached the quorum. */
-    if (server.cluster->failover_auth_count >= needed_quorum) {
-        /* We have the quorum, we can finally failover the master. */
-
-        serverLog(LL_NOTICE,
-            "Failover election won: I'm the new master.");
-
-        /* Update my configEpoch to the epoch of the election. */
-        if (myself->configEpoch < server.cluster->failover_auth_epoch) {
-            myself->configEpoch = server.cluster->failover_auth_epoch;
-            serverLog(LL_NOTICE,
-                "configEpoch set to %llu after successful failover",
-                (unsigned long long) myself->configEpoch);
-        }
-
-        /* Take responsibility for the cluster slots. */
-        clusterFailoverReplaceYourMaster();
-    } else {
-        clusterLogCantFailover(CLUSTER_CANT_FAILOVER_WAITING_VOTES);
-    }
-}
-
-/* -----------------------------------------------------------------------------
- * CLUSTER slave migration
- *
- * Slave migration is the process that allows a slave of a master that is
- * already covered by at least another slave, to "migrate" to a master that
- * is orphaned, that is, left with no working slaves.
- * ------------------------------------------------------------------------- */
-
-/* This function is responsible to decide if this replica should be migrated
- * to a different (orphaned) master. It is called by the clusterCron() function
- * only if:
- *
- * 1) We are a slave node.
- * 2) It was detected that there is at least one orphaned master in
- *    the cluster.
- * 3) We are a slave of one of the masters with the greatest number of
- *    slaves.
- *
- * This checks are performed by the caller since it requires to iterate
- * the nodes anyway, so we spend time into clusterHandleSlaveMigration()
- * if definitely needed.
- *
- * The function is called with a pre-computed max_slaves, that is the max
- * number of working (not in FAIL state) slaves for a single master.
- *
- * Additional conditions for migration are examined inside the function.
- */
-void clusterHandleSlaveMigration(int max_slaves) {
-    int j, okslaves = 0;
-    clusterNode *mymaster = myself->slaveof, *target = NULL, *candidate = NULL;
-    dictIterator *di;
-    dictEntry *de;
-
-    /* Step 1: Don't migrate if the cluster state is not ok. */
-    if (server.cluster->state != CLUSTER_OK) return;
-
-    /* Step 2: Don't migrate if my master will not be left with at least
-     *         'migration-barrier' slaves after my migration. */
-    if (mymaster == NULL) return;
-    for (j = 0; j < mymaster->numslaves; j++)
-        if (!nodeFailed(mymaster->slaves[j]) &&
-            !nodeTimedOut(mymaster->slaves[j])) okslaves++;
-    if (okslaves <= server.cluster_migration_barrier) return;
-
-    /* Step 3: Identify a candidate for migration, and check if among the
-     * masters with the greatest number of ok slaves, I'm the one with the
-     * smallest node ID (the "candidate slave").
-     *
-     * Note: this means that eventually a replica migration will occur
-     * since slaves that are reachable again always have their FAIL flag
-     * cleared, so eventually there must be a candidate.
-     * There is a possible race condition causing multiple
-     * slaves to migrate at the same time, but this is unlikely to
-     * happen and relatively harmless when it does. */
-    candidate = myself;
-    di = dictGetSafeIterator(server.cluster->nodes);
-    while((de = dictNext(di)) != NULL) {
-        clusterNode *node = dictGetVal(de);
-        int okslaves = 0, is_orphaned = 1;
-
-        /* We want to migrate only if this master is working, orphaned, and
-         * used to have slaves or if failed over a master that had slaves
-         * (MIGRATE_TO flag). This way we only migrate to instances that were
-         * supposed to have replicas. */
-        if (nodeIsSlave(node) || nodeFailed(node)) is_orphaned = 0;
-        if (!(node->flags & CLUSTER_NODE_MIGRATE_TO)) is_orphaned = 0;
-
-        /* Check number of working slaves. */
-        if (nodeIsMaster(node)) okslaves = clusterCountNonFailingSlaves(node);
-        if (okslaves > 0) is_orphaned = 0;
-
-        if (is_orphaned) {
-            if (!target && node->numslots > 0) target = node;
-
-            /* Track the starting time of the orphaned condition for this
-             * master. */
-            if (!node->orphaned_time) node->orphaned_time = mstime();
-        } else {
-            node->orphaned_time = 0;
-        }
-
-        /* Check if I'm the slave candidate for the migration: attached
-         * to a master with the maximum number of slaves and with the smallest
-         * node ID. */
-        if (okslaves == max_slaves) {
-            for (j = 0; j < node->numslaves; j++) {
-                if (memcmp(node->slaves[j]->name,
-                           candidate->name,
-                           CLUSTER_NAMELEN) < 0)
-                {
-                    candidate = node->slaves[j];
-                }
-            }
-        }
-    }
-    dictReleaseIterator(di);
-
-    /* Step 4: perform the migration if there is a target, and if I'm the
-     * candidate, but only if the master is continuously orphaned for a
-     * couple of seconds, so that during failovers, we give some time to
-     * the natural slaves of this instance to advertise their switch from
-     * the old master to the new one. */
-    if (target && candidate == myself &&
-        (mstime()-target->orphaned_time) > CLUSTER_SLAVE_MIGRATION_DELAY &&
-       !(server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_FAILOVER))
-    {
-        serverLog(LL_NOTICE,"Migrating to orphaned master %.40s",
-            target->name);
-        clusterSetMaster(target);
-    }
-}
-
-/* -----------------------------------------------------------------------------
- * CLUSTER manual failover
- *
- * This are the important steps performed by slaves during a manual failover:
- * 1) User send CLUSTER FAILOVER command. The failover state is initialized
- *    setting mf_end to the millisecond unix time at which we'll abort the
- *    attempt.
- * 2) Slave sends a MFSTART message to the master requesting to pause clients
- *    for two times the manual failover timeout CLUSTER_MF_TIMEOUT.
- *    When master is paused for manual failover, it also starts to flag
- *    packets with CLUSTERMSG_FLAG0_PAUSED.
- * 3) Slave waits for master to send its replication offset flagged as PAUSED.
- * 4) If slave received the offset from the master, and its offset matches,
- *    mf_can_start is set to 1, and clusterHandleSlaveFailover() will perform
- *    the failover as usually, with the difference that the vote request
- *    will be modified to force masters to vote for a slave that has a
- *    working master.
- *
- * From the point of view of the master things are simpler: when a
- * PAUSE_CLIENTS packet is received the master sets mf_end as well and
- * the sender in mf_slave. During the time limit for the manual failover
- * the master will just send PINGs more often to this slave, flagged with
- * the PAUSED flag, so that the slave will set mf_master_offset when receiving
- * a packet from the master with this flag set.
- *
- * The goal of the manual failover is to perform a fast failover without
- * data loss due to the asynchronous master-slave replication.
- * -------------------------------------------------------------------------- */
-
-/* Reset the manual failover state. This works for both masters and slaves
- * as all the state about manual failover is cleared.
- *
- * The function can be used both to initialize the manual failover state at
- * startup or to abort a manual failover in progress. */
-void resetManualFailover(void) {
-    if (server.cluster->mf_slave) {
-        /* We were a master failing over, so we paused clients and related actions.
-         * Regardless of the outcome we unpause now to allow traffic again. */
-        unpauseActions(PAUSE_DURING_FAILOVER);
-    }
-    server.cluster->mf_end = 0; /* No manual failover in progress. */
-    server.cluster->mf_can_start = 0;
-    server.cluster->mf_slave = NULL;
-    server.cluster->mf_master_offset = -1;
-}
-
-/* If a manual failover timed out, abort it. */
-void manualFailoverCheckTimeout(void) {
-    if (server.cluster->mf_end && server.cluster->mf_end < mstime()) {
-        serverLog(LL_WARNING,"Manual failover timed out.");
-        resetManualFailover();
-    }
-}
-
-/* This function is called from the cluster cron function in order to go
- * forward with a manual failover state machine. */
-void clusterHandleManualFailover(void) {
-    /* Return ASAP if no manual failover is in progress. */
-    if (server.cluster->mf_end == 0) return;
-
-    /* If mf_can_start is non-zero, the failover was already triggered so the
-     * next steps are performed by clusterHandleSlaveFailover(). */
-    if (server.cluster->mf_can_start) return;
-
-    if (server.cluster->mf_master_offset == -1) return; /* Wait for offset... */
-
-    if (server.cluster->mf_master_offset == replicationGetSlaveOffset()) {
-        /* Our replication offset matches the master replication offset
-         * announced after clients were paused. We can start the failover. */
-        server.cluster->mf_can_start = 1;
-        serverLog(LL_NOTICE,
-            "All master replication stream processed, "
-            "manual failover can start.");
-        clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_FAILOVER);
-        return;
-    }
-    clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_MANUALFAILOVER);
-}
-
-/* -----------------------------------------------------------------------------
- * CLUSTER cron job
- * -------------------------------------------------------------------------- */
-
-/* Check if the node is disconnected and re-establish the connection.
- * Also update a few stats while we are here, that can be used to make
- * better decisions in other part of the code. */
-static int clusterNodeCronHandleReconnect(clusterNode *node, mstime_t handshake_timeout, mstime_t now) {
-    /* Not interested in reconnecting the link with myself or nodes
-     * for which we have no address. */
-    if (node->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_NOADDR)) return 1;
-
-    if (node->flags & CLUSTER_NODE_PFAIL)
-        server.cluster->stats_pfail_nodes++;
-
-    /* A Node in HANDSHAKE state has a limited lifespan equal to the
-     * configured node timeout. */
-    if (nodeInHandshake(node) && now - node->ctime > handshake_timeout) {
-        clusterDelNode(node);
-        return 1;
-    }
-
-    if (node->link == NULL) {
-        clusterLink *link = createClusterLink(node);
-        link->conn = connCreate(connTypeOfCluster());
-        connSetPrivateData(link->conn, link);
-        if (connConnect(link->conn, node->ip, node->cport, server.bind_source_addr,
-                    clusterLinkConnectHandler) == C_ERR) {
-            /* We got a synchronous error from connect before
-             * clusterSendPing() had a chance to be called.
-             * If node->ping_sent is zero, failure detection can't work,
-             * so we claim we actually sent a ping now (that will
-             * be really sent as soon as the link is obtained). */
-            if (node->ping_sent == 0) node->ping_sent = mstime();
-            serverLog(LL_DEBUG, "Unable to connect to "
-                "Cluster Node [%s]:%d -> %s", node->ip,
-                node->cport, server.neterr);
-
-            freeClusterLink(link);
-            return 0;
-        }
-    }
-    return 0;
-}
-
-static void freeClusterLinkOnBufferLimitReached(clusterLink *link) {
-    if (link == NULL || server.cluster_link_msg_queue_limit_bytes == 0) {
-        return;
-    }
-
-    unsigned long long mem_link = link->send_msg_queue_mem;
-    if (mem_link > server.cluster_link_msg_queue_limit_bytes) {
-        serverLog(LL_WARNING, "Freeing cluster link(%s node %.40s, used memory: %llu) due to "
-                "exceeding send buffer memory limit.", link->inbound ? "from" : "to",
-                link->node ? link->node->name : "", mem_link);
-        freeClusterLink(link);
-        server.cluster->stat_cluster_links_buffer_limit_exceeded++;
-    }
-}
-
-/* Free outbound link to a node if its send buffer size exceeded limit. */
-static void clusterNodeCronFreeLinkOnBufferLimitReached(clusterNode *node) {
-    freeClusterLinkOnBufferLimitReached(node->link);
-    freeClusterLinkOnBufferLimitReached(node->inbound_link);
-}
-
-/* This is executed 10 times every second */
-void clusterCron(void) {
-    dictIterator *di;
-    dictEntry *de;
-    int update_state = 0;
-    int orphaned_masters; /* How many masters there are without ok slaves. */
-    int max_slaves; /* Max number of ok slaves for a single master. */
-    int this_slaves; /* Number of ok slaves for our master (if we are slave). */
-    mstime_t min_pong = 0, now = mstime();
-    clusterNode *min_pong_node = NULL;
-    static unsigned long long iteration = 0;
-    mstime_t handshake_timeout;
-
-    iteration++; /* Number of times this function was called so far. */
-
-    clusterUpdateMyselfHostname();
-
-    /* The handshake timeout is the time after which a handshake node that was
-     * not turned into a normal node is removed from the nodes. Usually it is
-     * just the NODE_TIMEOUT value, but when NODE_TIMEOUT is too small we use
-     * the value of 1 second. */
-    handshake_timeout = server.cluster_node_timeout;
-    if (handshake_timeout < 1000) handshake_timeout = 1000;
-
-    /* Clear so clusterNodeCronHandleReconnect can count the number of nodes in PFAIL. */
-    server.cluster->stats_pfail_nodes = 0;
-    /* Run through some of the operations we want to do on each cluster node. */
-    di = dictGetSafeIterator(server.cluster->nodes);
-    while((de = dictNext(di)) != NULL) {
-        clusterNode *node = dictGetVal(de);
-        /* We free the inbound or outboud link to the node if the link has an
-         * oversized message send queue and immediately try reconnecting. */
-        clusterNodeCronFreeLinkOnBufferLimitReached(node);
-        /* The protocol is that function(s) below return non-zero if the node was
-         * terminated.
-         */
-        if(clusterNodeCronHandleReconnect(node, handshake_timeout, now)) continue;
-    }
-    dictReleaseIterator(di); 
-
-    /* Ping some random node 1 time every 10 iterations, so that we usually ping
-     * one random node every second. */
-    if (!(iteration % 10)) {
-        int j;
-
-        /* Check a few random nodes and ping the one with the oldest
-         * pong_received time. */
-        for (j = 0; j < 5; j++) {
-            de = dictGetRandomKey(server.cluster->nodes);
-            clusterNode *this = dictGetVal(de);
-
-            /* Don't ping nodes disconnected or with a ping currently active. */
-            if (this->link == NULL || this->ping_sent != 0) continue;
-            if (this->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_HANDSHAKE))
-                continue;
-            if (min_pong_node == NULL || min_pong > this->pong_received) {
-                min_pong_node = this;
-                min_pong = this->pong_received;
-            }
-        }
-        if (min_pong_node) {
-            serverLog(LL_DEBUG,"Pinging node %.40s", min_pong_node->name);
-            clusterSendPing(min_pong_node->link, CLUSTERMSG_TYPE_PING);
-        }
-    }
-
-    /* Iterate nodes to check if we need to flag something as failing.
-     * This loop is also responsible to:
-     * 1) Check if there are orphaned masters (masters without non failing
-     *    slaves).
-     * 2) Count the max number of non failing slaves for a single master.
-     * 3) Count the number of slaves for our master, if we are a slave. */
-    orphaned_masters = 0;
-    max_slaves = 0;
-    this_slaves = 0;
-    di = dictGetSafeIterator(server.cluster->nodes);
-    while((de = dictNext(di)) != NULL) {
-        clusterNode *node = dictGetVal(de);
-        now = mstime(); /* Use an updated time at every iteration. */
-
-        if (node->flags &
-            (CLUSTER_NODE_MYSELF|CLUSTER_NODE_NOADDR|CLUSTER_NODE_HANDSHAKE))
-                continue;
-
-        /* Orphaned master check, useful only if the current instance
-         * is a slave that may migrate to another master. */
-        if (nodeIsSlave(myself) && nodeIsMaster(node) && !nodeFailed(node)) {
-            int okslaves = clusterCountNonFailingSlaves(node);
-
-            /* A master is orphaned if it is serving a non-zero number of
-             * slots, have no working slaves, but used to have at least one
-             * slave, or failed over a master that used to have slaves. */
-            if (okslaves == 0 && node->numslots > 0 &&
-                node->flags & CLUSTER_NODE_MIGRATE_TO)
-            {
-                orphaned_masters++;
-            }
-            if (okslaves > max_slaves) max_slaves = okslaves;
-            if (myself->slaveof == node)
-                this_slaves = okslaves;
-        }
-
-        /* If we are not receiving any data for more than half the cluster
-         * timeout, reconnect the link: maybe there is a connection
-         * issue even if the node is alive. */
-        mstime_t ping_delay = now - node->ping_sent;
-        mstime_t data_delay = now - node->data_received;
-        if (node->link && /* is connected */
-            now - node->link->ctime >
-            server.cluster_node_timeout && /* was not already reconnected */
-            node->ping_sent && /* we already sent a ping */
-            /* and we are waiting for the pong more than timeout/2 */
-            ping_delay > server.cluster_node_timeout/2 &&
-            /* and in such interval we are not seeing any traffic at all. */
-            data_delay > server.cluster_node_timeout/2)
-        {
-            /* Disconnect the link, it will be reconnected automatically. */
-            freeClusterLink(node->link);
-        }
-
-        /* If we have currently no active ping in this instance, and the
-         * received PONG is older than half the cluster timeout, send
-         * a new ping now, to ensure all the nodes are pinged without
-         * a too big delay. */
-        mstime_t ping_interval = server.cluster_ping_interval ? 
-            server.cluster_ping_interval : server.cluster_node_timeout/2;
-        if (node->link &&
-            node->ping_sent == 0 &&
-            (now - node->pong_received) > ping_interval)
-        {
-            clusterSendPing(node->link, CLUSTERMSG_TYPE_PING);
-            continue;
-        }
-
-        /* If we are a master and one of the slaves requested a manual
-         * failover, ping it continuously. */
-        if (server.cluster->mf_end &&
-            nodeIsMaster(myself) &&
-            server.cluster->mf_slave == node &&
-            node->link)
-        {
-            clusterSendPing(node->link, CLUSTERMSG_TYPE_PING);
-            continue;
-        }
-
-        /* Check only if we have an active ping for this instance. */
-        if (node->ping_sent == 0) continue;
-
-        /* Check if this node looks unreachable.
-         * Note that if we already received the PONG, then node->ping_sent
-         * is zero, so can't reach this code at all, so we don't risk of
-         * checking for a PONG delay if we didn't sent the PING.
-         *
-         * We also consider every incoming data as proof of liveness, since
-         * our cluster bus link is also used for data: under heavy data
-         * load pong delays are possible. */
-        mstime_t node_delay = (ping_delay < data_delay) ? ping_delay :
-                                                          data_delay;
-
-        if (node_delay > server.cluster_node_timeout) {
-            /* Timeout reached. Set the node as possibly failing if it is
-             * not already in this state. */
-            if (!(node->flags & (CLUSTER_NODE_PFAIL|CLUSTER_NODE_FAIL))) {
-                serverLog(LL_DEBUG,"*** NODE %.40s possibly failing",
-                    node->name);
-                node->flags |= CLUSTER_NODE_PFAIL;
-                update_state = 1;
-            }
-        }
-    }
-    dictReleaseIterator(di);
-
-    /* If we are a slave node but the replication is still turned off,
-     * enable it if we know the address of our master and it appears to
-     * be up. */
-    if (nodeIsSlave(myself) &&
-        server.masterhost == NULL &&
-        myself->slaveof &&
-        nodeHasAddr(myself->slaveof))
-    {
-        replicationSetMaster(myself->slaveof->ip, getNodeDefaultReplicationPort(myself->slaveof));
-    }
-
-    /* Abort a manual failover if the timeout is reached. */
-    manualFailoverCheckTimeout();
-
-    if (nodeIsSlave(myself)) {
-        clusterHandleManualFailover();
-        if (!(server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_FAILOVER))
-            clusterHandleSlaveFailover();
-        /* If there are orphaned slaves, and we are a slave among the masters
-         * with the max number of non-failing slaves, consider migrating to
-         * the orphaned masters. Note that it does not make sense to try
-         * a migration if there is no master with at least *two* working
-         * slaves. */
-        if (orphaned_masters && max_slaves >= 2 && this_slaves == max_slaves &&
-		server.cluster_allow_replica_migration)
-            clusterHandleSlaveMigration(max_slaves);
-    }
-
-    if (update_state || server.cluster->state == CLUSTER_FAIL)
-        clusterUpdateState();
-}
-
-/* This function is called before the event handler returns to sleep for
- * events. It is useful to perform operations that must be done ASAP in
- * reaction to events fired but that are not safe to perform inside event
- * handlers, or to perform potentially expansive tasks that we need to do
- * a single time before replying to clients. */
-void clusterBeforeSleep(void) {
-    int flags = server.cluster->todo_before_sleep;
-
-    /* Reset our flags (not strictly needed since every single function
-     * called for flags set should be able to clear its flag). */
-    server.cluster->todo_before_sleep = 0;
-
-    if (flags & CLUSTER_TODO_HANDLE_MANUALFAILOVER) {
-        /* Handle manual failover as soon as possible so that won't have a 100ms
-         * as it was handled only in clusterCron */
-        if(nodeIsSlave(myself)) {
-            clusterHandleManualFailover();
-            if (!(server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_FAILOVER))
-                clusterHandleSlaveFailover();
-        }
-    } else if (flags & CLUSTER_TODO_HANDLE_FAILOVER) {
-        /* Handle failover, this is needed when it is likely that there is already
-         * the quorum from masters in order to react fast. */
-        clusterHandleSlaveFailover();
-    }
-
-    /* Update the cluster state. */
-    if (flags & CLUSTER_TODO_UPDATE_STATE)
-        clusterUpdateState();
-
-    /* Save the config, possibly using fsync. */
-    if (flags & CLUSTER_TODO_SAVE_CONFIG) {
-        int fsync = flags & CLUSTER_TODO_FSYNC_CONFIG;
-        clusterSaveConfigOrDie(fsync);
-    }
-}
-
-void clusterDoBeforeSleep(int flags) {
-    server.cluster->todo_before_sleep |= flags;
-}
-
-/* -----------------------------------------------------------------------------
- * Slots management
- * -------------------------------------------------------------------------- */
-
-/* Test bit 'pos' in a generic bitmap. Return 1 if the bit is set,
- * otherwise 0. */
-int bitmapTestBit(unsigned char *bitmap, int pos) {
-    off_t byte = pos/8;
-    int bit = pos&7;
-    return (bitmap[byte] & (1<<bit)) != 0;
-}
-
-/* Set the bit at position 'pos' in a bitmap. */
-void bitmapSetBit(unsigned char *bitmap, int pos) {
-    off_t byte = pos/8;
-    int bit = pos&7;
-    bitmap[byte] |= 1<<bit;
-}
-
-/* Clear the bit at position 'pos' in a bitmap. */
-void bitmapClearBit(unsigned char *bitmap, int pos) {
-    off_t byte = pos/8;
-    int bit = pos&7;
-    bitmap[byte] &= ~(1<<bit);
-}
-
-/* Return non-zero if there is at least one master with slaves in the cluster.
- * Otherwise zero is returned. Used by clusterNodeSetSlotBit() to set the
- * MIGRATE_TO flag the when a master gets the first slot. */
-int clusterMastersHaveSlaves(void) {
-    dictIterator *di = dictGetSafeIterator(server.cluster->nodes);
-    dictEntry *de;
-    int slaves = 0;
-    while((de = dictNext(di)) != NULL) {
-        clusterNode *node = dictGetVal(de);
-
-        if (nodeIsSlave(node)) continue;
-        slaves += node->numslaves;
-    }
-    dictReleaseIterator(di);
-    return slaves != 0;
-}
-
-/* Set the slot bit and return the old value. */
-int clusterNodeSetSlotBit(clusterNode *n, int slot) {
-    int old = bitmapTestBit(n->slots,slot);
-    if (!old) {
-        bitmapSetBit(n->slots,slot);
-        n->numslots++;
-        /* When a master gets its first slot, even if it has no slaves,
-         * it gets flagged with MIGRATE_TO, that is, the master is a valid
-         * target for replicas migration, if and only if at least one of
-         * the other masters has slaves right now.
-         *
-         * Normally masters are valid targets of replica migration if:
-         * 1. The used to have slaves (but no longer have).
-         * 2. They are slaves failing over a master that used to have slaves.
-         *
-         * However new masters with slots assigned are considered valid
-         * migration targets if the rest of the cluster is not a slave-less.
-         *
-         * See https://github.com/redis/redis/issues/3043 for more info. */
-        if (n->numslots == 1 && clusterMastersHaveSlaves())
-            n->flags |= CLUSTER_NODE_MIGRATE_TO;
-    }
-    return old;
-}
-
-/* Clear the slot bit and return the old value. */
-int clusterNodeClearSlotBit(clusterNode *n, int slot) {
-    int old = bitmapTestBit(n->slots,slot);
-    if (old) {
-        bitmapClearBit(n->slots,slot);
-        n->numslots--;
-    }
-    return old;
-}
-
-/* Return the slot bit from the cluster node structure. */
-int clusterNodeGetSlotBit(clusterNode *n, int slot) {
-    return bitmapTestBit(n->slots,slot);
-}
-
-/* Add the specified slot to the list of slots that node 'n' will
- * serve. Return C_OK if the operation ended with success.
- * If the slot is already assigned to another instance this is considered
- * an error and C_ERR is returned. */
-int clusterAddSlot(clusterNode *n, int slot) {
-    if (server.cluster->slots[slot]) return C_ERR;
-    clusterNodeSetSlotBit(n,slot);
-    server.cluster->slots[slot] = n;
-    return C_OK;
-}
-
-/* Delete the specified slot marking it as unassigned.
- * Returns C_OK if the slot was assigned, otherwise if the slot was
- * already unassigned C_ERR is returned. */
-int clusterDelSlot(int slot) {
-    clusterNode *n = server.cluster->slots[slot];
-
-    if (!n) return C_ERR;
-
-    /* Cleanup the channels in master/replica as part of slot deletion. */
-    list *nodes_for_slot = clusterGetNodesInMyShard(n);
-    serverAssert(nodes_for_slot != NULL);
-    listNode *ln = listSearchKey(nodes_for_slot, myself);
-    if (ln != NULL) {
-        removeChannelsInSlot(slot);
-    }
-    serverAssert(clusterNodeClearSlotBit(n,slot) == 1);
-    server.cluster->slots[slot] = NULL;
-    return C_OK;
-}
-
-/* Delete all the slots associated with the specified node.
- * The number of deleted slots is returned. */
-int clusterDelNodeSlots(clusterNode *node) {
-    int deleted = 0, j;
-
-    for (j = 0; j < CLUSTER_SLOTS; j++) {
-        if (clusterNodeGetSlotBit(node,j)) {
-            clusterDelSlot(j);
-            deleted++;
-        }
-    }
-    return deleted;
-}
-
-/* Clear the migrating / importing state for all the slots.
- * This is useful at initialization and when turning a master into slave. */
-void clusterCloseAllSlots(void) {
-    memset(server.cluster->migrating_slots_to,0,
-        sizeof(server.cluster->migrating_slots_to));
-    memset(server.cluster->importing_slots_from,0,
-        sizeof(server.cluster->importing_slots_from));
-}
-
-/* -----------------------------------------------------------------------------
- * Cluster state evaluation function
- * -------------------------------------------------------------------------- */
-
-/* The following are defines that are only used in the evaluation function
- * and are based on heuristics. Actually the main point about the rejoin and
- * writable delay is that they should be a few orders of magnitude larger
- * than the network latency. */
-#define CLUSTER_MAX_REJOIN_DELAY 5000
-#define CLUSTER_MIN_REJOIN_DELAY 500
-#define CLUSTER_WRITABLE_DELAY 2000
-
-void clusterUpdateState(void) {
-    int j, new_state;
-    int reachable_masters = 0;
-    static mstime_t among_minority_time;
-    static mstime_t first_call_time = 0;
-
-    server.cluster->todo_before_sleep &= ~CLUSTER_TODO_UPDATE_STATE;
-
-    /* If this is a master node, wait some time before turning the state
-     * into OK, since it is not a good idea to rejoin the cluster as a writable
-     * master, after a reboot, without giving the cluster a chance to
-     * reconfigure this node. Note that the delay is calculated starting from
-     * the first call to this function and not since the server start, in order
-     * to not count the DB loading time. */
-    if (first_call_time == 0) first_call_time = mstime();
-    if (nodeIsMaster(myself) &&
-        server.cluster->state == CLUSTER_FAIL &&
-        mstime() - first_call_time < CLUSTER_WRITABLE_DELAY) return;
-
-    /* Start assuming the state is OK. We'll turn it into FAIL if there
-     * are the right conditions. */
-    new_state = CLUSTER_OK;
-
-    /* Check if all the slots are covered. */
-    if (server.cluster_require_full_coverage) {
-        for (j = 0; j < CLUSTER_SLOTS; j++) {
-            if (server.cluster->slots[j] == NULL ||
-                server.cluster->slots[j]->flags & (CLUSTER_NODE_FAIL))
-            {
-                new_state = CLUSTER_FAIL;
-                break;
-            }
-        }
-    }
-
-    /* Compute the cluster size, that is the number of master nodes
-     * serving at least a single slot.
-     *
-     * At the same time count the number of reachable masters having
-     * at least one slot. */
-    {
-        dictIterator *di;
-        dictEntry *de;
-
-        server.cluster->size = 0;
-        di = dictGetSafeIterator(server.cluster->nodes);
-        while((de = dictNext(di)) != NULL) {
-            clusterNode *node = dictGetVal(de);
-
-            if (nodeIsMaster(node) && node->numslots) {
-                server.cluster->size++;
-                if ((node->flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) == 0)
-                    reachable_masters++;
-            }
-        }
-        dictReleaseIterator(di);
-    }
-
-    /* If we are in a minority partition, change the cluster state
-     * to FAIL. */
-    {
-        int needed_quorum = (server.cluster->size / 2) + 1;
-
-        if (reachable_masters < needed_quorum) {
-            new_state = CLUSTER_FAIL;
-            among_minority_time = mstime();
-        }
-    }
-
-    /* Log a state change */
-    if (new_state != server.cluster->state) {
-        mstime_t rejoin_delay = server.cluster_node_timeout;
-
-        /* If the instance is a master and was partitioned away with the
-         * minority, don't let it accept queries for some time after the
-         * partition heals, to make sure there is enough time to receive
-         * a configuration update. */
-        if (rejoin_delay > CLUSTER_MAX_REJOIN_DELAY)
-            rejoin_delay = CLUSTER_MAX_REJOIN_DELAY;
-        if (rejoin_delay < CLUSTER_MIN_REJOIN_DELAY)
-            rejoin_delay = CLUSTER_MIN_REJOIN_DELAY;
-
-        if (new_state == CLUSTER_OK &&
-            nodeIsMaster(myself) &&
-            mstime() - among_minority_time < rejoin_delay)
-        {
-            return;
-        }
-
-        /* Change the state and log the event. */
-        serverLog(new_state == CLUSTER_OK ? LL_NOTICE : LL_WARNING,
-            "Cluster state changed: %s",
-            new_state == CLUSTER_OK ? "ok" : "fail");
-        server.cluster->state = new_state;
-    }
-}
-
-/* This function is called after the node startup in order to verify that data
- * loaded from disk is in agreement with the cluster configuration:
- *
- * 1) If we find keys about hash slots we have no responsibility for, the
- *    following happens:
- *    A) If no other node is in charge according to the current cluster
- *       configuration, we add these slots to our node.
- *    B) If according to our config other nodes are already in charge for
- *       this slots, we set the slots as IMPORTING from our point of view
- *       in order to justify we have those slots, and in order to make
- *       redis-cli aware of the issue, so that it can try to fix it.
- * 2) If we find data in a DB different than DB0 we return C_ERR to
- *    signal the caller it should quit the server with an error message
- *    or take other actions.
- *
- * The function always returns C_OK even if it will try to correct
- * the error described in "1". However if data is found in DB different
- * from DB0, C_ERR is returned.
- *
- * The function also uses the logging facility in order to warn the user
- * about desynchronizations between the data we have in memory and the
- * cluster configuration. */
-int verifyClusterConfigWithData(void) {
-    int j;
-    int update_config = 0;
-
-    /* Return ASAP if a module disabled cluster redirections. In that case
-     * every master can store keys about every possible hash slot. */
-    if (server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_REDIRECTION)
-        return C_OK;
-
-    /* If this node is a slave, don't perform the check at all as we
-     * completely depend on the replication stream. */
-    if (nodeIsSlave(myself)) return C_OK;
-
-    /* Make sure we only have keys in DB0. */
-    for (j = 1; j < server.dbnum; j++) {
-        if (dictSize(server.db[j].dict)) return C_ERR;
-    }
-
-    /* Check that all the slots we see populated memory have a corresponding
-     * entry in the cluster table. Otherwise fix the table. */
-    for (j = 0; j < CLUSTER_SLOTS; j++) {
-        if (!countKeysInSlot(j)) continue; /* No keys in this slot. */
-        /* Check if we are assigned to this slot or if we are importing it.
-         * In both cases check the next slot as the configuration makes
-         * sense. */
-        if (server.cluster->slots[j] == myself ||
-            server.cluster->importing_slots_from[j] != NULL) continue;
-
-        /* If we are here data and cluster config don't agree, and we have
-         * slot 'j' populated even if we are not importing it, nor we are
-         * assigned to this slot. Fix this condition. */
-
-        update_config++;
-        /* Case A: slot is unassigned. Take responsibility for it. */
-        if (server.cluster->slots[j] == NULL) {
-            serverLog(LL_NOTICE, "I have keys for unassigned slot %d. "
-                                    "Taking responsibility for it.",j);
-            clusterAddSlot(myself,j);
-        } else {
-            serverLog(LL_NOTICE, "I have keys for slot %d, but the slot is "
-                                    "assigned to another node. "
-                                    "Setting it to importing state.",j);
-            server.cluster->importing_slots_from[j] = server.cluster->slots[j];
-        }
-    }
-    if (update_config) clusterSaveConfigOrDie(1);
-    return C_OK;
-}
-
-/* -----------------------------------------------------------------------------
- * SLAVE nodes handling
- * -------------------------------------------------------------------------- */
-
-/* Set the specified node 'n' as master for this node.
- * If this node is currently a master, it is turned into a slave. */
-void clusterSetMaster(clusterNode *n) {
-    serverAssert(n != myself);
-    serverAssert(myself->numslots == 0);
-
-    if (nodeIsMaster(myself)) {
-        myself->flags &= ~(CLUSTER_NODE_MASTER|CLUSTER_NODE_MIGRATE_TO);
-        myself->flags |= CLUSTER_NODE_SLAVE;
-        clusterCloseAllSlots();
-    } else {
-        if (myself->slaveof)
-            clusterNodeRemoveSlave(myself->slaveof,myself);
-    }
-    myself->slaveof = n;
-    updateShardId(myself, n->shard_id);
-    clusterNodeAddSlave(n,myself);
-    replicationSetMaster(n->ip, getNodeDefaultReplicationPort(n));
-    resetManualFailover();
-}
-
-/* -----------------------------------------------------------------------------
- * Nodes to string representation functions.
- * -------------------------------------------------------------------------- */
-
-struct redisNodeFlags {
-    uint16_t flag;
-    char *name;
-};
-
-static struct redisNodeFlags redisNodeFlagsTable[] = {
-    {CLUSTER_NODE_MYSELF,       "myself,"},
-    {CLUSTER_NODE_MASTER,       "master,"},
-    {CLUSTER_NODE_SLAVE,        "slave,"},
-    {CLUSTER_NODE_PFAIL,        "fail?,"},
-    {CLUSTER_NODE_FAIL,         "fail,"},
-    {CLUSTER_NODE_HANDSHAKE,    "handshake,"},
-    {CLUSTER_NODE_NOADDR,       "noaddr,"},
-    {CLUSTER_NODE_NOFAILOVER,   "nofailover,"}
-};
-
-/* Concatenate the comma separated list of node flags to the given SDS
- * string 'ci'. */
-sds representClusterNodeFlags(sds ci, uint16_t flags) {
-    size_t orig_len = sdslen(ci);
-    int i, size = sizeof(redisNodeFlagsTable)/sizeof(struct redisNodeFlags);
-    for (i = 0; i < size; i++) {
-        struct redisNodeFlags *nodeflag = redisNodeFlagsTable + i;
-        if (flags & nodeflag->flag) ci = sdscat(ci, nodeflag->name);
-    }
-    /* If no flag was added, add the "noflags" special flag. */
-    if (sdslen(ci) == orig_len) ci = sdscat(ci,"noflags,");
-    sdsIncrLen(ci,-1); /* Remove trailing comma. */
-    return ci;
-}
-
-/* Concatenate the slot ownership information to the given SDS string 'ci'.
- * If the slot ownership is in a contiguous block, it's represented as start-end pair,
- * else each slot is added separately. */
-sds representSlotInfo(sds ci, uint16_t *slot_info_pairs, int slot_info_pairs_count) {
-    for (int i = 0; i< slot_info_pairs_count; i+=2) {
-        unsigned long start = slot_info_pairs[i];
-        unsigned long end = slot_info_pairs[i+1];
-        if (start == end) {
-            ci = sdscatfmt(ci, " %i", start);
-        } else {
-            ci = sdscatfmt(ci, " %i-%i", start, end);
-        }
-    }
-    return ci;
-}
-
-/* Generate a csv-alike representation of the specified cluster node.
- * See clusterGenNodesDescription() top comment for more information.
- *
- * The function returns the string representation as an SDS string. */
-sds clusterGenNodeDescription(client *c, clusterNode *node, int tls_primary) {
-    int j, start;
-    sds ci;
-    int port = getNodeClientPort(node, tls_primary);
-
-    /* Node coordinates */
-    ci = sdscatlen(sdsempty(),node->name,CLUSTER_NAMELEN);
-    ci = sdscatfmt(ci," %s:%i@%i",
-        node->ip,
-        port,
-        node->cport);
-    if (sdslen(node->hostname) != 0) {
-        ci = sdscatfmt(ci,",%s", node->hostname);
-    }
-    if (sdslen(node->hostname) == 0) {
-        ci = sdscatfmt(ci,",", 1);
-    }
-    /* Don't expose aux fields to any clients yet but do allow them
-     * to be persisted to nodes.conf */
-    if (c == NULL) {
-        for (int i = af_count-1; i >=0; i--) {
-            if ((tls_primary && i == af_tls_port) || (!tls_primary && i == af_tcp_port)) {
-                continue;
-            }
-            if (auxFieldHandlers[i].isPresent(node)) {
-                ci = sdscatprintf(ci, ",%s=", auxFieldHandlers[i].field);
-                ci = auxFieldHandlers[i].getter(node, ci);
-            }
-        }
-    }
-
-    /* Flags */
-    ci = sdscatlen(ci," ",1);
-    ci = representClusterNodeFlags(ci, node->flags);
-
-    /* Slave of... or just "-" */
-    ci = sdscatlen(ci," ",1);
-    if (node->slaveof)
-        ci = sdscatlen(ci,node->slaveof->name,CLUSTER_NAMELEN);
-    else
-        ci = sdscatlen(ci,"-",1);
-
-    unsigned long long nodeEpoch = node->configEpoch;
-    if (nodeIsSlave(node) && node->slaveof) {
-        nodeEpoch = node->slaveof->configEpoch;
-    }
-    /* Latency from the POV of this node, config epoch, link status */
-    ci = sdscatfmt(ci," %I %I %U %s",
-        (long long) node->ping_sent,
-        (long long) node->pong_received,
-        nodeEpoch,
-        (node->link || node->flags & CLUSTER_NODE_MYSELF) ?
-                    "connected" : "disconnected");
-
-    /* Slots served by this instance. If we already have slots info,
-     * append it directly, otherwise, generate slots only if it has. */
-    if (node->slot_info_pairs) {
-        ci = representSlotInfo(ci, node->slot_info_pairs, node->slot_info_pairs_count);
-    } else if (node->numslots > 0) {
-        start = -1;
-        for (j = 0; j < CLUSTER_SLOTS; j++) {
-            int bit;
-
-            if ((bit = clusterNodeGetSlotBit(node,j)) != 0) {
-                if (start == -1) start = j;
-            }
-            if (start != -1 && (!bit || j == CLUSTER_SLOTS-1)) {
-                if (bit && j == CLUSTER_SLOTS-1) j++;
-
-                if (start == j-1) {
-                    ci = sdscatfmt(ci," %i",start);
-                } else {
-                    ci = sdscatfmt(ci," %i-%i",start,j-1);
-                }
-                start = -1;
-            }
-        }
-    }
-
-    /* Just for MYSELF node we also dump info about slots that
-     * we are migrating to other instances or importing from other
-     * instances. */
-    if (node->flags & CLUSTER_NODE_MYSELF) {
-        for (j = 0; j < CLUSTER_SLOTS; j++) {
-            if (server.cluster->migrating_slots_to[j]) {
-                ci = sdscatprintf(ci," [%d->-%.40s]",j,
-                    server.cluster->migrating_slots_to[j]->name);
-            } else if (server.cluster->importing_slots_from[j]) {
-                ci = sdscatprintf(ci," [%d-<-%.40s]",j,
-                    server.cluster->importing_slots_from[j]->name);
-            }
-        }
-    }
-    return ci;
-}
-
-/* Generate the slot topology for all nodes and store the string representation
- * in the slots_info struct on the node. This is used to improve the efficiency
- * of clusterGenNodesDescription() because it removes looping of the slot space
- * for generating the slot info for each node individually. */
-void clusterGenNodesSlotsInfo(int filter) {
-    clusterNode *n = NULL;
-    int start = -1;
-
-    for (int i = 0; i <= CLUSTER_SLOTS; i++) {
-        /* Find start node and slot id. */
-        if (n == NULL) {
-            if (i == CLUSTER_SLOTS) break;
-            n = server.cluster->slots[i];
-            start = i;
-            continue;
-        }
-
-        /* Generate slots info when occur different node with start
-         * or end of slot. */
-        if (i == CLUSTER_SLOTS || n != server.cluster->slots[i]) {
-            if (!(n->flags & filter)) {
-                if (!n->slot_info_pairs) {
-                    n->slot_info_pairs = zmalloc(2 * n->numslots * sizeof(uint16_t));
-                }
-                serverAssert((n->slot_info_pairs_count + 1) < (2 * n->numslots));
-                n->slot_info_pairs[n->slot_info_pairs_count++] = start;
-                n->slot_info_pairs[n->slot_info_pairs_count++] = i-1;
-            }
-            if (i == CLUSTER_SLOTS) break;
-            n = server.cluster->slots[i];
-            start = i;
-        }
-    }
-}
-
-void clusterFreeNodesSlotsInfo(clusterNode *n) {
-    zfree(n->slot_info_pairs);
-    n->slot_info_pairs = NULL;
-    n->slot_info_pairs_count = 0;
-}
-
-/* Generate a csv-alike representation of the nodes we are aware of,
- * including the "myself" node, and return an SDS string containing the
- * representation (it is up to the caller to free it).
- *
- * All the nodes matching at least one of the node flags specified in
- * "filter" are excluded from the output, so using zero as a filter will
- * include all the known nodes in the representation, including nodes in
- * the HANDSHAKE state.
- *
- * Setting tls_primary to 1 to put TLS port in the main <ip>:<port> 
- * field and put TCP port in aux field, instead of the opposite way.
- *
- * The representation obtained using this function is used for the output
- * of the CLUSTER NODES function, and as format for the cluster
- * configuration file (nodes.conf) for a given node. */
-sds clusterGenNodesDescription(client *c, int filter, int tls_primary) {
-    sds ci = sdsempty(), ni;
-    dictIterator *di;
-    dictEntry *de;
-
-    /* Generate all nodes slots info firstly. */
-    clusterGenNodesSlotsInfo(filter);
-
-    di = dictGetSafeIterator(server.cluster->nodes);
-    while((de = dictNext(di)) != NULL) {
-        clusterNode *node = dictGetVal(de);
-
-        if (node->flags & filter) continue;
-        ni = clusterGenNodeDescription(c, node, tls_primary);
-        ci = sdscatsds(ci,ni);
-        sdsfree(ni);
-        ci = sdscatlen(ci,"\n",1);
-
-        /* Release slots info. */
-        clusterFreeNodesSlotsInfo(node);
-    }
-    dictReleaseIterator(di);
-    return ci;
-}
-
-/* Add to the output buffer of the given client the description of the given cluster link.
- * The description is a map with each entry being an attribute of the link. */
-void addReplyClusterLinkDescription(client *c, clusterLink *link) {
-    addReplyMapLen(c, 6);
-
-    addReplyBulkCString(c, "direction");
-    addReplyBulkCString(c, link->inbound ? "from" : "to");
-
-    /* addReplyClusterLinkDescription is only called for links that have been
-     * associated with nodes. The association is always bi-directional, so
-     * in addReplyClusterLinkDescription, link->node should never be NULL. */
-    serverAssert(link->node);
-    sds node_name = sdsnewlen(link->node->name, CLUSTER_NAMELEN);
-    addReplyBulkCString(c, "node");
-    addReplyBulkCString(c, node_name);
-    sdsfree(node_name);
-
-    addReplyBulkCString(c, "create-time");
-    addReplyLongLong(c, link->ctime);
-
-    char events[3], *p;
-    p = events;
-    if (link->conn) {
-        if (connHasReadHandler(link->conn)) *p++ = 'r';
-        if (connHasWriteHandler(link->conn)) *p++ = 'w';
-    }
-    *p = '\0';
-    addReplyBulkCString(c, "events");
-    addReplyBulkCString(c, events);
-
-    addReplyBulkCString(c, "send-buffer-allocated");
-    addReplyLongLong(c, link->send_msg_queue_mem);
-
-    addReplyBulkCString(c, "send-buffer-used");
-    addReplyLongLong(c, link->send_msg_queue_mem);
-}
-
-/* Add to the output buffer of the given client an array of cluster link descriptions,
- * with array entry being a description of a single current cluster link. */
-void addReplyClusterLinksDescription(client *c) {
-    dictIterator *di;
-    dictEntry *de;
-    void *arraylen_ptr = NULL;
-    int num_links = 0;
-
-    arraylen_ptr = addReplyDeferredLen(c);
-
-    di = dictGetSafeIterator(server.cluster->nodes);
-    while((de = dictNext(di)) != NULL) {
-        clusterNode *node = dictGetVal(de);
-        if (node->link) {
-            num_links++;
-            addReplyClusterLinkDescription(c, node->link);
-        }
-        if (node->inbound_link) {
-            num_links++;
-            addReplyClusterLinkDescription(c, node->inbound_link);
-        }
-    }
-    dictReleaseIterator(di);
-
-    setDeferredArrayLen(c, arraylen_ptr, num_links);
-}
-
-/* -----------------------------------------------------------------------------
- * CLUSTER command
- * -------------------------------------------------------------------------- */
-
-const char *getPreferredEndpoint(clusterNode *n) {
-    switch(server.cluster_preferred_endpoint_type) {
-    case CLUSTER_ENDPOINT_TYPE_IP: return n->ip;
-    case CLUSTER_ENDPOINT_TYPE_HOSTNAME: return (sdslen(n->hostname) != 0) ? n->hostname : "?";
-    case CLUSTER_ENDPOINT_TYPE_UNKNOWN_ENDPOINT: return "";
-    }
-    return "unknown";
-}
-
-const char *clusterGetMessageTypeString(int type) {
-    switch(type) {
-    case CLUSTERMSG_TYPE_PING: return "ping";
-    case CLUSTERMSG_TYPE_PONG: return "pong";
-    case CLUSTERMSG_TYPE_MEET: return "meet";
-    case CLUSTERMSG_TYPE_FAIL: return "fail";
-    case CLUSTERMSG_TYPE_PUBLISH: return "publish";
-    case CLUSTERMSG_TYPE_PUBLISHSHARD: return "publishshard";
-    case CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST: return "auth-req";
-    case CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK: return "auth-ack";
-    case CLUSTERMSG_TYPE_UPDATE: return "update";
-    case CLUSTERMSG_TYPE_MFSTART: return "mfstart";
-    case CLUSTERMSG_TYPE_MODULE: return "module";
-    }
-    return "unknown";
-}
-
-int getSlotOrReply(client *c, robj *o) {
-    long long slot;
-
-    if (getLongLongFromObject(o,&slot) != C_OK ||
-        slot < 0 || slot >= CLUSTER_SLOTS)
-    {
-        addReplyError(c,"Invalid or out of range slot");
-        return -1;
-    }
-    return (int) slot;
-}
-
-/* Returns an indication if the replica node is fully available
- * and should be listed in CLUSTER SLOTS response.
- * Returns 1 for available nodes, 0 for nodes that have 
- * not finished their initial sync, in failed state, or are 
- * otherwise considered not available to serve read commands. */
-static int isReplicaAvailable(clusterNode *node) {
-    if (nodeFailed(node)) {
-        return 0;
-    }
-    long long repl_offset = node->repl_offset;
-    if (node->flags & CLUSTER_NODE_MYSELF) {
-        /* Nodes do not update their own information
-         * in the cluster node list. */
-        repl_offset = replicationGetSlaveOffset();
-    }
-    return (repl_offset != 0);
-}
-
-int checkSlotAssignmentsOrReply(client *c, unsigned char *slots, int del, int start_slot, int end_slot) {
-    int slot;
-    for (slot = start_slot; slot <= end_slot; slot++) {
-        if (del && server.cluster->slots[slot] == NULL) {
-            addReplyErrorFormat(c,"Slot %d is already unassigned", slot);
-            return C_ERR;
-        } else if (!del && server.cluster->slots[slot]) {
-            addReplyErrorFormat(c,"Slot %d is already busy", slot);
-            return C_ERR;
-        }
-        if (slots[slot]++ == 1) {
-            addReplyErrorFormat(c,"Slot %d specified multiple times",(int)slot);
-            return C_ERR;
-        }
-    }
-    return C_OK;
-}
-
-void clusterUpdateSlots(client *c, unsigned char *slots, int del) {
-    int j;
-    for (j = 0; j < CLUSTER_SLOTS; j++) {
-        if (slots[j]) {
-            int retval;
-                
-            /* If this slot was set as importing we can clear this
-             * state as now we are the real owner of the slot. */
-            if (server.cluster->importing_slots_from[j])
-                server.cluster->importing_slots_from[j] = NULL;
-
-            retval = del ? clusterDelSlot(j) :
-                           clusterAddSlot(myself,j);
-            serverAssertWithInfo(c,NULL,retval == C_OK);
-        }
-    }
-}
-
-void addNodeToNodeReply(client *c, clusterNode *node) {
-    addReplyArrayLen(c, 4);
-    if (server.cluster_preferred_endpoint_type == CLUSTER_ENDPOINT_TYPE_IP) {
-        addReplyBulkCString(c, node->ip);
-    } else if (server.cluster_preferred_endpoint_type == CLUSTER_ENDPOINT_TYPE_HOSTNAME) {
-        if (sdslen(node->hostname) != 0) {
-            addReplyBulkCBuffer(c, node->hostname, sdslen(node->hostname));
-        } else {
-            addReplyBulkCString(c, "?");
-        }
-    } else if (server.cluster_preferred_endpoint_type == CLUSTER_ENDPOINT_TYPE_UNKNOWN_ENDPOINT) {
-        addReplyNull(c);
-    } else {
-        serverPanic("Unrecognized preferred endpoint type");
-    }
-
-    /* Report TLS ports to TLS client, and report non-TLS port to non-TLS client. */
-    addReplyLongLong(c, getNodeClientPort(node, connIsTLS(c->conn)));
-    addReplyBulkCBuffer(c, node->name, CLUSTER_NAMELEN);
-
-    /* Add the additional endpoint information, this is all the known networking information
-     * that is not the preferred endpoint. Note the logic is evaluated twice so we can
-     * correctly report the number of additional network arguments without using a deferred
-     * map, an assertion is made at the end to check we set the right length. */
-    int length = 0;
-    if (server.cluster_preferred_endpoint_type != CLUSTER_ENDPOINT_TYPE_IP) {
-        length++;
-    }
-    if (server.cluster_preferred_endpoint_type != CLUSTER_ENDPOINT_TYPE_HOSTNAME
-        && sdslen(node->hostname) != 0)
-    {
-        length++;
-    }
-    addReplyMapLen(c, length);
-
-    if (server.cluster_preferred_endpoint_type != CLUSTER_ENDPOINT_TYPE_IP) {
-        addReplyBulkCString(c, "ip");
-        addReplyBulkCString(c, node->ip);
-        length--;
-    }
-    if (server.cluster_preferred_endpoint_type != CLUSTER_ENDPOINT_TYPE_HOSTNAME
-        && sdslen(node->hostname) != 0)
-    {
-        addReplyBulkCString(c, "hostname");
-        addReplyBulkCBuffer(c, node->hostname, sdslen(node->hostname));
-        length--;
-    }
-    serverAssert(length == 0);
-}
-
-void addNodeReplyForClusterSlot(client *c, clusterNode *node, int start_slot, int end_slot) {
-    int i, nested_elements = 3; /* slots (2) + master addr (1) */
-    for (i = 0; i < node->numslaves; i++) {
-        if (!isReplicaAvailable(node->slaves[i])) continue;
-        nested_elements++;
-    }
-    addReplyArrayLen(c, nested_elements);
-    addReplyLongLong(c, start_slot);
-    addReplyLongLong(c, end_slot);
-    addNodeToNodeReply(c, node);
-
-    /* Remaining nodes in reply are replicas for slot range */
-    for (i = 0; i < node->numslaves; i++) {
-        /* This loop is copy/pasted from clusterGenNodeDescription()
-         * with modifications for per-slot node aggregation. */
-        if (!isReplicaAvailable(node->slaves[i])) continue;
-        addNodeToNodeReply(c, node->slaves[i]);
-        nested_elements--;
-    }
-    serverAssert(nested_elements == 3); /* Original 3 elements */
-}
-
-/* Add detailed information of a node to the output buffer of the given client. */
-void addNodeDetailsToShardReply(client *c, clusterNode *node) {
-    int reply_count = 0;
-    void *node_replylen = addReplyDeferredLen(c);
-    addReplyBulkCString(c, "id");
-    addReplyBulkCBuffer(c, node->name, CLUSTER_NAMELEN);
-    reply_count++;
-
-    if (node->tcp_port) {
-        addReplyBulkCString(c, "port");
-        addReplyLongLong(c, node->tcp_port);
-        reply_count++;
-    }
-
-    if (node->tls_port) {
-        addReplyBulkCString(c, "tls-port");
-        addReplyLongLong(c, node->tls_port);
-        reply_count++;
-    }
-
-    addReplyBulkCString(c, "ip");
-    addReplyBulkCString(c, node->ip);
-    reply_count++;
-
-    addReplyBulkCString(c, "endpoint");
-    addReplyBulkCString(c, getPreferredEndpoint(node));
-    reply_count++;
-
-    if (sdslen(node->hostname) != 0) {
-        addReplyBulkCString(c, "hostname");
-        addReplyBulkCBuffer(c, node->hostname, sdslen(node->hostname));
-        reply_count++;
-    }
-
-    long long node_offset;
-    if (node->flags & CLUSTER_NODE_MYSELF) {
-        node_offset = nodeIsSlave(node) ? replicationGetSlaveOffset() : server.master_repl_offset;
-    } else {
-        node_offset = node->repl_offset;
-    }
-
-    addReplyBulkCString(c, "role");
-    addReplyBulkCString(c, nodeIsSlave(node) ? "replica" : "master");
-    reply_count++;
-
-    addReplyBulkCString(c, "replication-offset");
-    addReplyLongLong(c, node_offset);
-    reply_count++;
-
-    addReplyBulkCString(c, "health");
-    const char *health_msg = NULL;
-    if (nodeFailed(node)) {
-        health_msg = "fail";
-    } else if (nodeIsSlave(node) && node_offset == 0) {
-        health_msg = "loading";
-    } else {
-        health_msg = "online";
-    }
-    addReplyBulkCString(c, health_msg);
-    reply_count++;
-
-    setDeferredMapLen(c, node_replylen, reply_count);
-}
-
-/* Add the shard reply of a single shard based off the given primary node. */
-void addShardReplyForClusterShards(client *c, list *nodes) {
-    serverAssert(listLength(nodes) > 0);
-    clusterNode *n = listNodeValue(listFirst(nodes));
-    addReplyMapLen(c, 2);
-    addReplyBulkCString(c, "slots");
-
-    /* Use slot_info_pairs from the primary only */
-    while (n->slaveof != NULL) n = n->slaveof;
-
-    if (n->slot_info_pairs != NULL) {
-        serverAssert((n->slot_info_pairs_count % 2) == 0);
-        addReplyArrayLen(c, n->slot_info_pairs_count);
-        for (int i = 0; i < n->slot_info_pairs_count; i++)
-            addReplyBulkLongLong(c, (unsigned long)n->slot_info_pairs[i]);
-    } else {
-        /* If no slot info pair is provided, the node owns no slots */
-        addReplyArrayLen(c, 0);
-    }
-
-    addReplyBulkCString(c, "nodes");
-    addReplyArrayLen(c, listLength(nodes));
-    listIter li;
-    listRewind(nodes, &li);
-    for (listNode *ln = listNext(&li); ln != NULL; ln = listNext(&li)) {
-        clusterNode *n = listNodeValue(ln);
-        addNodeDetailsToShardReply(c, n);
-        clusterFreeNodesSlotsInfo(n);
-    }
-}
-
-/* Add to the output buffer of the given client, an array of slot (start, end)
- * pair owned by the shard, also the primary and set of replica(s) along with
- * information about each node. */
-void clusterReplyShards(client *c) {
-    addReplyArrayLen(c, dictSize(server.cluster->shards));
-    /* This call will add slot_info_pairs to all nodes */
-    clusterGenNodesSlotsInfo(0);
-    dictIterator *di = dictGetSafeIterator(server.cluster->shards);
-    for(dictEntry *de = dictNext(di); de != NULL; de = dictNext(di)) {
-        addShardReplyForClusterShards(c, dictGetVal(de));
-    }
-    dictReleaseIterator(di);
-}
-
-void clusterReplyMultiBulkSlots(client * c) {
-    /* Format: 1) 1) start slot
-     *            2) end slot
-     *            3) 1) master IP
-     *               2) master port
-     *               3) node ID
-     *            4) 1) replica IP
-     *               2) replica port
-     *               3) node ID
-     *           ... continued until done
-     */
-    clusterNode *n = NULL;
-    int num_masters = 0, start = -1;
-    void *slot_replylen = addReplyDeferredLen(c);
-
-    for (int i = 0; i <= CLUSTER_SLOTS; i++) {
-        /* Find start node and slot id. */
-        if (n == NULL) {
-            if (i == CLUSTER_SLOTS) break;
-            n = server.cluster->slots[i];
-            start = i;
-            continue;
-        }
-
-        /* Add cluster slots info when occur different node with start
-         * or end of slot. */
-        if (i == CLUSTER_SLOTS || n != server.cluster->slots[i]) {
-            addNodeReplyForClusterSlot(c, n, start, i-1);
-            num_masters++;
-            if (i == CLUSTER_SLOTS) break;
-            n = server.cluster->slots[i];
-            start = i;
-        }
-    }
-    setDeferredArrayLen(c, slot_replylen, num_masters);
-}
-
-sds genClusterInfoString(void) {
-    sds info = sdsempty();
-    char *statestr[] = {"ok","fail"};
-    int slots_assigned = 0, slots_ok = 0, slots_pfail = 0, slots_fail = 0;
-    uint64_t myepoch;
-    int j;
-
-    for (j = 0; j < CLUSTER_SLOTS; j++) {
-        clusterNode *n = server.cluster->slots[j];
-
-        if (n == NULL) continue;
-        slots_assigned++;
-        if (nodeFailed(n)) {
-            slots_fail++;
-        } else if (nodeTimedOut(n)) {
-            slots_pfail++;
-        } else {
-            slots_ok++;
-        }
-    }
-
-    myepoch = (nodeIsSlave(myself) && myself->slaveof) ?
-                myself->slaveof->configEpoch : myself->configEpoch;
-
-    info = sdscatprintf(info,
-        "cluster_state:%s\r\n"
-        "cluster_slots_assigned:%d\r\n"
-        "cluster_slots_ok:%d\r\n"
-        "cluster_slots_pfail:%d\r\n"
-        "cluster_slots_fail:%d\r\n"
-        "cluster_known_nodes:%lu\r\n"
-        "cluster_size:%d\r\n"
-        "cluster_current_epoch:%llu\r\n"
-        "cluster_my_epoch:%llu\r\n"
-        , statestr[server.cluster->state],
-        slots_assigned,
-        slots_ok,
-        slots_pfail,
-        slots_fail,
-        dictSize(server.cluster->nodes),
-        server.cluster->size,
-        (unsigned long long) server.cluster->currentEpoch,
-        (unsigned long long) myepoch
-    );
-
-    /* Show stats about messages sent and received. */
-    long long tot_msg_sent = 0;
-    long long tot_msg_received = 0;
-
-    for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) {
-        if (server.cluster->stats_bus_messages_sent[i] == 0) continue;
-        tot_msg_sent += server.cluster->stats_bus_messages_sent[i];
-        info = sdscatprintf(info,
-            "cluster_stats_messages_%s_sent:%lld\r\n",
-            clusterGetMessageTypeString(i),
-            server.cluster->stats_bus_messages_sent[i]);
-    }
-    info = sdscatprintf(info,
-        "cluster_stats_messages_sent:%lld\r\n", tot_msg_sent);
-
-    for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) {
-        if (server.cluster->stats_bus_messages_received[i] == 0) continue;
-        tot_msg_received += server.cluster->stats_bus_messages_received[i];
-        info = sdscatprintf(info,
-            "cluster_stats_messages_%s_received:%lld\r\n",
-            clusterGetMessageTypeString(i),
-            server.cluster->stats_bus_messages_received[i]);
-    }
-    info = sdscatprintf(info,
-        "cluster_stats_messages_received:%lld\r\n", tot_msg_received);
-
-    info = sdscatprintf(info,
-        "total_cluster_links_buffer_limit_exceeded:%llu\r\n",
-        server.cluster->stat_cluster_links_buffer_limit_exceeded);
-
-    return info;
-}
-
-void clusterCommand(client *c) {
-    if (server.cluster_enabled == 0) {
-        addReplyError(c,"This instance has cluster support disabled");
-        return;
-    }
-
-    if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
-        const char *help[] = {
-"ADDSLOTS <slot> [<slot> ...]",
-"    Assign slots to current node.",
-"ADDSLOTSRANGE <start slot> <end slot> [<start slot> <end slot> ...]",
-"    Assign slots which are between <start-slot> and <end-slot> to current node.",
-"BUMPEPOCH",
-"    Advance the cluster config epoch.",
-"COUNT-FAILURE-REPORTS <node-id>",
-"    Return number of failure reports for <node-id>.",
-"COUNTKEYSINSLOT <slot>",
-"    Return the number of keys in <slot>.",
-"DELSLOTS <slot> [<slot> ...]",
-"    Delete slots information from current node.",
-"DELSLOTSRANGE <start slot> <end slot> [<start slot> <end slot> ...]",
-"    Delete slots information which are between <start-slot> and <end-slot> from current node.",
-"FAILOVER [FORCE|TAKEOVER]",
-"    Promote current replica node to being a master.",
-"FORGET <node-id>",
-"    Remove a node from the cluster.",
-"GETKEYSINSLOT <slot> <count>",
-"    Return key names stored by current node in a slot.",
-"FLUSHSLOTS",
-"    Delete current node own slots information.",
-"INFO",
-"    Return information about the cluster.",
-"KEYSLOT <key>",
-"    Return the hash slot for <key>.",
-"MEET <ip> <port> [<bus-port>]",
-"    Connect nodes into a working cluster.",
-"MYID",
-"    Return the node id.",
-"MYSHARDID",
-"    Return the node's shard id.",
-"NODES",
-"    Return cluster configuration seen by node. Output format:",
-"    <id> <ip:port@bus-port[,hostname]> <flags> <master> <pings> <pongs> <epoch> <link> <slot> ...",
-"REPLICATE <node-id>",
-"    Configure current node as replica to <node-id>.",
-"RESET [HARD|SOFT]",
-"    Reset current node (default: soft).",
-"SET-CONFIG-EPOCH <epoch>",
-"    Set config epoch of current node.",
-"SETSLOT <slot> (IMPORTING <node-id>|MIGRATING <node-id>|STABLE|NODE <node-id>)",
-"    Set slot state.",
-"REPLICAS <node-id>",
-"    Return <node-id> replicas.",
-"SAVECONFIG",
-"    Force saving cluster configuration on disk.",
-"SLOTS",
-"    Return information about slots range mappings. Each range is made of:",
-"    start, end, master and replicas IP addresses, ports and ids",
-"SHARDS",
-"    Return information about slot range mappings and the nodes associated with them.",
-"LINKS",
-"    Return information about all network links between this node and its peers.",
-"    Output format is an array where each array element is a map containing attributes of a link",
-NULL
-        };
-        addReplyHelp(c, help);
-    } else if (!strcasecmp(c->argv[1]->ptr,"meet") && (c->argc == 4 || c->argc == 5)) {
-        /* CLUSTER MEET <ip> <port> [cport] */
-        long long port, cport;
-
-        if (getLongLongFromObject(c->argv[3], &port) != C_OK) {
-            addReplyErrorFormat(c,"Invalid base port specified: %s",
-                                (char*)c->argv[3]->ptr);
-            return;
-        }
-
-        if (c->argc == 5) {
-            if (getLongLongFromObject(c->argv[4], &cport) != C_OK) {
-                addReplyErrorFormat(c,"Invalid bus port specified: %s",
-                                    (char*)c->argv[4]->ptr);
-                return;
-            }
-        } else {
-            cport = port + CLUSTER_PORT_INCR;
-        }
-
-        if (clusterStartHandshake(c->argv[2]->ptr,port,cport) == 0 &&
-            errno == EINVAL)
-        {
-            addReplyErrorFormat(c,"Invalid node address specified: %s:%s",
-                            (char*)c->argv[2]->ptr, (char*)c->argv[3]->ptr);
-        } else {
-            addReply(c,shared.ok);
-        }
-    } else if (!strcasecmp(c->argv[1]->ptr,"nodes") && c->argc == 2) {
-        /* CLUSTER NODES */
-        /* Report TLS ports to TLS client, and report non-TLS port to non-TLS client. */
-        sds nodes = clusterGenNodesDescription(c, 0, connIsTLS(c->conn));
-        addReplyVerbatim(c,nodes,sdslen(nodes),"txt");
-        sdsfree(nodes);
-    } else if (!strcasecmp(c->argv[1]->ptr,"myid") && c->argc == 2) {
-        /* CLUSTER MYID */
-        addReplyBulkCBuffer(c,myself->name, CLUSTER_NAMELEN);
-    } else if (!strcasecmp(c->argv[1]->ptr,"myshardid") && c->argc == 2) {
-        /* CLUSTER MYSHARDID */
-        addReplyBulkCBuffer(c,myself->shard_id, CLUSTER_NAMELEN);
-    } else if (!strcasecmp(c->argv[1]->ptr,"slots") && c->argc == 2) {
-        /* CLUSTER SLOTS */
-        clusterReplyMultiBulkSlots(c);
-    } else if (!strcasecmp(c->argv[1]->ptr,"shards") && c->argc == 2) {
-        /* CLUSTER SHARDS */
-        clusterReplyShards(c);
-    } else if (!strcasecmp(c->argv[1]->ptr,"flushslots") && c->argc == 2) {
-        /* CLUSTER FLUSHSLOTS */
-        if (dictSize(server.db[0].dict) != 0) {
-            addReplyError(c,"DB must be empty to perform CLUSTER FLUSHSLOTS.");
-            return;
-        }
-        clusterDelNodeSlots(myself);
-        clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
-        addReply(c,shared.ok);
-    } else if ((!strcasecmp(c->argv[1]->ptr,"addslots") ||
-               !strcasecmp(c->argv[1]->ptr,"delslots")) && c->argc >= 3)
-    {
-        /* CLUSTER ADDSLOTS <slot> [slot] ... */
-        /* CLUSTER DELSLOTS <slot> [slot] ... */
-        int j, slot;
-        unsigned char *slots = zmalloc(CLUSTER_SLOTS);
-        int del = !strcasecmp(c->argv[1]->ptr,"delslots");
-
-        memset(slots,0,CLUSTER_SLOTS);
-        /* Check that all the arguments are parseable.*/
-        for (j = 2; j < c->argc; j++) {
-            if ((slot = getSlotOrReply(c,c->argv[j])) == C_ERR) {
-                zfree(slots);
-                return;
-            }
-        }
-        /* Check that the slots are not already busy. */
-        for (j = 2; j < c->argc; j++) {
-            slot = getSlotOrReply(c,c->argv[j]);
-            if (checkSlotAssignmentsOrReply(c, slots, del, slot, slot) == C_ERR) {
-                zfree(slots);
-                return;
-            }
-        }
-        clusterUpdateSlots(c, slots, del);    
-        zfree(slots);
-        clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
-        addReply(c,shared.ok);
-    } else if ((!strcasecmp(c->argv[1]->ptr,"addslotsrange") ||
-               !strcasecmp(c->argv[1]->ptr,"delslotsrange")) && c->argc >= 4) {
-        if (c->argc % 2 == 1) {
-            addReplyErrorArity(c);
-            return;
-        }
-        /* CLUSTER ADDSLOTSRANGE <start slot> <end slot> [<start slot> <end slot> ...] */
-        /* CLUSTER DELSLOTSRANGE <start slot> <end slot> [<start slot> <end slot> ...] */
-        int j, startslot, endslot;
-        unsigned char *slots = zmalloc(CLUSTER_SLOTS);
-        int del = !strcasecmp(c->argv[1]->ptr,"delslotsrange");
-
-        memset(slots,0,CLUSTER_SLOTS);
-        /* Check that all the arguments are parseable and that all the
-         * slots are not already busy. */
-        for (j = 2; j < c->argc; j += 2) {
-            if ((startslot = getSlotOrReply(c,c->argv[j])) == C_ERR) {
-                zfree(slots);
-                return;
-            }
-            if ((endslot = getSlotOrReply(c,c->argv[j+1])) == C_ERR) {
-                zfree(slots);
-                return;
-            }
-            if (startslot > endslot) {
-                addReplyErrorFormat(c,"start slot number %d is greater than end slot number %d", startslot, endslot);
-                zfree(slots);
-                return;
-            }
-
-            if (checkSlotAssignmentsOrReply(c, slots, del, startslot, endslot) == C_ERR) {
-                zfree(slots);
-                return;
-            }
-        }
-        clusterUpdateSlots(c, slots, del);
-        zfree(slots);
-        clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
-        addReply(c,shared.ok);
-    } else if (!strcasecmp(c->argv[1]->ptr,"setslot") && c->argc >= 4) {
-        /* SETSLOT 10 MIGRATING <node ID> */
-        /* SETSLOT 10 IMPORTING <node ID> */
-        /* SETSLOT 10 STABLE */
-        /* SETSLOT 10 NODE <node ID> */
-        int slot;
-        clusterNode *n;
-
-        if (nodeIsSlave(myself)) {
-            addReplyError(c,"Please use SETSLOT only with masters.");
-            return;
-        }
-
-        if ((slot = getSlotOrReply(c,c->argv[2])) == -1) return;
-
-        if (!strcasecmp(c->argv[3]->ptr,"migrating") && c->argc == 5) {
-            if (server.cluster->slots[slot] != myself) {
-                addReplyErrorFormat(c,"I'm not the owner of hash slot %u",slot);
-                return;
-            }
-            n = clusterLookupNode(c->argv[4]->ptr, sdslen(c->argv[4]->ptr));
-            if (n == NULL) {
-                addReplyErrorFormat(c,"I don't know about node %s",
-                    (char*)c->argv[4]->ptr);
-                return;
-            }
-            if (nodeIsSlave(n)) {
-                addReplyError(c,"Target node is not a master");
-                return;
-            }
-            server.cluster->migrating_slots_to[slot] = n;
-        } else if (!strcasecmp(c->argv[3]->ptr,"importing") && c->argc == 5) {
-            if (server.cluster->slots[slot] == myself) {
-                addReplyErrorFormat(c,
-                    "I'm already the owner of hash slot %u",slot);
-                return;
-            }
-            n = clusterLookupNode(c->argv[4]->ptr, sdslen(c->argv[4]->ptr));
-            if (n == NULL) {
-                addReplyErrorFormat(c,"I don't know about node %s",
-                    (char*)c->argv[4]->ptr);
-                return;
-            }
-            if (nodeIsSlave(n)) {
-                addReplyError(c,"Target node is not a master");
-                return;
-            }
-            server.cluster->importing_slots_from[slot] = n;
-        } else if (!strcasecmp(c->argv[3]->ptr,"stable") && c->argc == 4) {
-            /* CLUSTER SETSLOT <SLOT> STABLE */
-            server.cluster->importing_slots_from[slot] = NULL;
-            server.cluster->migrating_slots_to[slot] = NULL;
-        } else if (!strcasecmp(c->argv[3]->ptr,"node") && c->argc == 5) {
-            /* CLUSTER SETSLOT <SLOT> NODE <NODE ID> */
-            n = clusterLookupNode(c->argv[4]->ptr, sdslen(c->argv[4]->ptr));
-            if (!n) {
-                addReplyErrorFormat(c,"Unknown node %s",
-                    (char*)c->argv[4]->ptr);
-                return;
-            }
-            if (nodeIsSlave(n)) {
-                addReplyError(c,"Target node is not a master");
-                return;
-            }
-            /* If this hash slot was served by 'myself' before to switch
-             * make sure there are no longer local keys for this hash slot. */
-            if (server.cluster->slots[slot] == myself && n != myself) {
-                if (countKeysInSlot(slot) != 0) {
-                    addReplyErrorFormat(c,
-                        "Can't assign hashslot %d to a different node "
-                        "while I still hold keys for this hash slot.", slot);
-                    return;
-                }
-            }
-            /* If this slot is in migrating status but we have no keys
-             * for it assigning the slot to another node will clear
-             * the migrating status. */
-            if (countKeysInSlot(slot) == 0 &&
-                server.cluster->migrating_slots_to[slot])
-                server.cluster->migrating_slots_to[slot] = NULL;
-
-            int slot_was_mine = server.cluster->slots[slot] == myself;
-            clusterDelSlot(slot);
-            clusterAddSlot(n,slot);
-
-            /* If we are a master left without slots, we should turn into a
-             * replica of the new master. */
-            if (slot_was_mine &&
-                n != myself &&
-                myself->numslots == 0 &&
-                server.cluster_allow_replica_migration)
-            {
-                serverLog(LL_NOTICE,
-                          "Configuration change detected. Reconfiguring myself "
-                          "as a replica of %.40s (%s)", n->name, n->human_nodename);
-                clusterSetMaster(n);
-                clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG |
-                                     CLUSTER_TODO_UPDATE_STATE |
-                                     CLUSTER_TODO_FSYNC_CONFIG);
-            }
-
-            /* If this node was importing this slot, assigning the slot to
-             * itself also clears the importing status. */
-            if (n == myself &&
-                server.cluster->importing_slots_from[slot])
-            {
-                /* This slot was manually migrated, set this node configEpoch
-                 * to a new epoch so that the new version can be propagated
-                 * by the cluster.
-                 *
-                 * Note that if this ever results in a collision with another
-                 * node getting the same configEpoch, for example because a
-                 * failover happens at the same time we close the slot, the
-                 * configEpoch collision resolution will fix it assigning
-                 * a different epoch to each node. */
-                if (clusterBumpConfigEpochWithoutConsensus() == C_OK) {
-                    serverLog(LL_NOTICE,
-                        "configEpoch updated after importing slot %d", slot);
-                }
-                server.cluster->importing_slots_from[slot] = NULL;
-                /* After importing this slot, let the other nodes know as
-                 * soon as possible. */
-                clusterBroadcastPong(CLUSTER_BROADCAST_ALL);
-            }
-        } else {
-            addReplyError(c,
-                "Invalid CLUSTER SETSLOT action or number of arguments. Try CLUSTER HELP");
-            return;
-        }
-        clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|CLUSTER_TODO_UPDATE_STATE);
-        addReply(c,shared.ok);
-    } else if (!strcasecmp(c->argv[1]->ptr,"bumpepoch") && c->argc == 2) {
-        /* CLUSTER BUMPEPOCH */
-        int retval = clusterBumpConfigEpochWithoutConsensus();
-        sds reply = sdscatprintf(sdsempty(),"+%s %llu\r\n",
-                (retval == C_OK) ? "BUMPED" : "STILL",
-                (unsigned long long) myself->configEpoch);
-        addReplySds(c,reply);
-    } else if (!strcasecmp(c->argv[1]->ptr,"info") && c->argc == 2) {
-        /* CLUSTER INFO */
-       
-        sds info = genClusterInfoString();
-
-        /* Produce the reply protocol. */
-        addReplyVerbatim(c,info,sdslen(info),"txt");
-        sdsfree(info);
-    } else if (!strcasecmp(c->argv[1]->ptr,"saveconfig") && c->argc == 2) {
-        int retval = clusterSaveConfig(1);
-
-        if (retval == 0)
-            addReply(c,shared.ok);
-        else
-            addReplyErrorFormat(c,"error saving the cluster node config: %s",
-                strerror(errno));
-    } else if (!strcasecmp(c->argv[1]->ptr,"keyslot") && c->argc == 3) {
-        /* CLUSTER KEYSLOT <key> */
-        sds key = c->argv[2]->ptr;
-
-        addReplyLongLong(c,keyHashSlot(key,sdslen(key)));
-    } else if (!strcasecmp(c->argv[1]->ptr,"countkeysinslot") && c->argc == 3) {
-        /* CLUSTER COUNTKEYSINSLOT <slot> */
-        long long slot;
-
-        if (getLongLongFromObjectOrReply(c,c->argv[2],&slot,NULL) != C_OK)
-            return;
-        if (slot < 0 || slot >= CLUSTER_SLOTS) {
-            addReplyError(c,"Invalid slot");
-            return;
-        }
-        addReplyLongLong(c,countKeysInSlot(slot));
-    } else if (!strcasecmp(c->argv[1]->ptr,"getkeysinslot") && c->argc == 4) {
-        /* CLUSTER GETKEYSINSLOT <slot> <count> */
-        long long maxkeys, slot;
-
-        if (getLongLongFromObjectOrReply(c,c->argv[2],&slot,NULL) != C_OK)
-            return;
-        if (getLongLongFromObjectOrReply(c,c->argv[3],&maxkeys,NULL)
-            != C_OK)
-            return;
-        if (slot < 0 || slot >= CLUSTER_SLOTS || maxkeys < 0) {
-            addReplyError(c,"Invalid slot or number of keys");
-            return;
-        }
-
-        unsigned int keys_in_slot = countKeysInSlot(slot);
-        unsigned int numkeys = maxkeys > keys_in_slot ? keys_in_slot : maxkeys;
-        addReplyArrayLen(c,numkeys);
-        dictEntry *de = (*server.db->slots_to_keys).by_slot[slot].head;
-        for (unsigned int j = 0; j < numkeys; j++) {
-            serverAssert(de != NULL);
-            sds sdskey = dictGetKey(de);
-            addReplyBulkCBuffer(c, sdskey, sdslen(sdskey));
-            de = dictEntryNextInSlot(de);
-        }
-    } else if (!strcasecmp(c->argv[1]->ptr,"forget") && c->argc == 3) {
-        /* CLUSTER FORGET <NODE ID> */
-        clusterNode *n = clusterLookupNode(c->argv[2]->ptr, sdslen(c->argv[2]->ptr));
-        if (!n) {
-            if (clusterBlacklistExists((char*)c->argv[2]->ptr))
-                /* Already forgotten. The deletion may have been gossipped by
-                 * another node, so we pretend it succeeded. */
-                addReply(c,shared.ok);
-            else
-                addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[2]->ptr);
-            return;
-        } else if (n == myself) {
-            addReplyError(c,"I tried hard but I can't forget myself...");
-            return;
-        } else if (nodeIsSlave(myself) && myself->slaveof == n) {
-            addReplyError(c,"Can't forget my master!");
-            return;
-        }
-        clusterBlacklistAddNode(n);
-        clusterDelNode(n);
-        clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|
-                             CLUSTER_TODO_SAVE_CONFIG);
-        addReply(c,shared.ok);
-    } else if (!strcasecmp(c->argv[1]->ptr,"replicate") && c->argc == 3) {
-        /* CLUSTER REPLICATE <NODE ID> */
-        /* Lookup the specified node in our table. */
-        clusterNode *n = clusterLookupNode(c->argv[2]->ptr, sdslen(c->argv[2]->ptr));
-        if (!n) {
-            addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[2]->ptr);
-            return;
-        }
-
-        /* I can't replicate myself. */
-        if (n == myself) {
-            addReplyError(c,"Can't replicate myself");
-            return;
-        }
+/*
+ * Copyright (c) 2009-Present, Redis Ltd.
+ * All rights reserved.
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
+ *
+ * Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
+ */
 
-        /* Can't replicate a slave. */
-        if (nodeIsSlave(n)) {
-            addReplyError(c,"I can only replicate a master, not a replica.");
-            return;
-        }
+/*
+ * cluster.c contains the common parts of a clustering
+ * implementation, the parts that are shared between
+ * any implementation of Redis clustering.
+ */
 
-        /* If the instance is currently a master, it should have no assigned
-         * slots nor keys to accept to replicate some other node.
-         * Slaves can switch to another master without issues. */
-        if (nodeIsMaster(myself) &&
-            (myself->numslots != 0 || dictSize(server.db[0].dict) != 0)) {
-            addReplyError(c,
-                "To set a master the node must be empty and "
-                "without assigned slots.");
-            return;
-        }
+#include "server.h"
+#include "cluster.h"
 
-        /* Set the master. */
-        clusterSetMaster(n);
-        clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
-        addReply(c,shared.ok);
-    } else if ((!strcasecmp(c->argv[1]->ptr,"slaves") ||
-                !strcasecmp(c->argv[1]->ptr,"replicas")) && c->argc == 3) {
-        /* CLUSTER SLAVES <NODE ID> */
-        clusterNode *n = clusterLookupNode(c->argv[2]->ptr, sdslen(c->argv[2]->ptr));
-        int j;
+#include <ctype.h>
 
-        /* Lookup the specified node in our table. */
-        if (!n) {
-            addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[2]->ptr);
-            return;
-        }
+/* -----------------------------------------------------------------------------
+ * Key space handling
+ * -------------------------------------------------------------------------- */
 
-        if (nodeIsSlave(n)) {
-            addReplyError(c,"The specified node is not a master");
-            return;
-        }
+/* We have 16384 hash slots. The hash slot of a given key is obtained
+ * as the least significant 14 bits of the crc16 of the key.
+ *
+ * However, if the key contains the {...} pattern, only the part between
+ * { and } is hashed. This may be useful in the future to force certain
+ * keys to be in the same node (assuming no resharding is in progress). */
+unsigned int keyHashSlot(char *key, int keylen) {
+    int s, e; /* start-end indexes of { and } */
 
-        /* Report TLS ports to TLS client, and report non-TLS port to non-TLS client. */
-        addReplyArrayLen(c,n->numslaves);
-        for (j = 0; j < n->numslaves; j++) {
-            sds ni = clusterGenNodeDescription(c, n->slaves[j], connIsTLS(c->conn));
-            addReplyBulkCString(c,ni);
-            sdsfree(ni);
-        }
-    } else if (!strcasecmp(c->argv[1]->ptr,"count-failure-reports") &&
-               c->argc == 3)
-    {
-        /* CLUSTER COUNT-FAILURE-REPORTS <NODE ID> */
-        clusterNode *n = clusterLookupNode(c->argv[2]->ptr, sdslen(c->argv[2]->ptr));
+    for (s = 0; s < keylen; s++)
+        if (key[s] == '{') break;
 
-        if (!n) {
-            addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[2]->ptr);
-            return;
-        } else {
-            addReplyLongLong(c,clusterNodeFailureReportsCount(n));
-        }
-    } else if (!strcasecmp(c->argv[1]->ptr,"failover") &&
-               (c->argc == 2 || c->argc == 3))
-    {
-        /* CLUSTER FAILOVER [FORCE|TAKEOVER] */
-        int force = 0, takeover = 0;
-
-        if (c->argc == 3) {
-            if (!strcasecmp(c->argv[2]->ptr,"force")) {
-                force = 1;
-            } else if (!strcasecmp(c->argv[2]->ptr,"takeover")) {
-                takeover = 1;
-                force = 1; /* Takeover also implies force. */
-            } else {
-                addReplyErrorObject(c,shared.syntaxerr);
-                return;
-            }
-        }
+    /* No '{' ? Hash the whole key. This is the base case. */
+    if (s == keylen) return crc16(key,keylen) & 0x3FFF;
 
-        /* Check preconditions. */
-        if (nodeIsMaster(myself)) {
-            addReplyError(c,"You should send CLUSTER FAILOVER to a replica");
-            return;
-        } else if (myself->slaveof == NULL) {
-            addReplyError(c,"I'm a replica but my master is unknown to me");
-            return;
-        } else if (!force &&
-                   (nodeFailed(myself->slaveof) ||
-                    myself->slaveof->link == NULL))
-        {
-            addReplyError(c,"Master is down or failed, "
-                            "please use CLUSTER FAILOVER FORCE");
-            return;
-        }
-        resetManualFailover();
-        server.cluster->mf_end = mstime() + CLUSTER_MF_TIMEOUT;
-
-        if (takeover) {
-            /* A takeover does not perform any initial check. It just
-             * generates a new configuration epoch for this node without
-             * consensus, claims the master's slots, and broadcast the new
-             * configuration. */
-            serverLog(LL_NOTICE,"Taking over the master (user request).");
-            clusterBumpConfigEpochWithoutConsensus();
-            clusterFailoverReplaceYourMaster();
-        } else if (force) {
-            /* If this is a forced failover, we don't need to talk with our
-             * master to agree about the offset. We just failover taking over
-             * it without coordination. */
-            serverLog(LL_NOTICE,"Forced failover user request accepted.");
-            server.cluster->mf_can_start = 1;
-        } else {
-            serverLog(LL_NOTICE,"Manual failover user request accepted.");
-            clusterSendMFStart(myself->slaveof);
-        }
-        addReply(c,shared.ok);
-    } else if (!strcasecmp(c->argv[1]->ptr,"set-config-epoch") && c->argc == 3)
-    {
-        /* CLUSTER SET-CONFIG-EPOCH <epoch>
-         *
-         * The user is allowed to set the config epoch only when a node is
-         * totally fresh: no config epoch, no other known node, and so forth.
-         * This happens at cluster creation time to start with a cluster where
-         * every node has a different node ID, without to rely on the conflicts
-         * resolution system which is too slow when a big cluster is created. */
-        long long epoch;
-
-        if (getLongLongFromObjectOrReply(c,c->argv[2],&epoch,NULL) != C_OK)
-            return;
+    /* '{' found? Check if we have the corresponding '}'. */
+    for (e = s+1; e < keylen; e++)
+        if (key[e] == '}') break;
 
-        if (epoch < 0) {
-            addReplyErrorFormat(c,"Invalid config epoch specified: %lld",epoch);
-        } else if (dictSize(server.cluster->nodes) > 1) {
-            addReplyError(c,"The user can assign a config epoch only when the "
-                            "node does not know any other node.");
-        } else if (myself->configEpoch != 0) {
-            addReplyError(c,"Node config epoch is already non-zero");
-        } else {
-            myself->configEpoch = epoch;
-            serverLog(LL_NOTICE,
-                "configEpoch set to %llu via CLUSTER SET-CONFIG-EPOCH",
-                (unsigned long long) myself->configEpoch);
-
-            if (server.cluster->currentEpoch < (uint64_t)epoch)
-                server.cluster->currentEpoch = epoch;
-            /* No need to fsync the config here since in the unlucky event
-             * of a failure to persist the config, the conflict resolution code
-             * will assign a unique config to this node. */
-            clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|
-                                 CLUSTER_TODO_SAVE_CONFIG);
-            addReply(c,shared.ok);
-        }
-    } else if (!strcasecmp(c->argv[1]->ptr,"reset") &&
-               (c->argc == 2 || c->argc == 3))
-    {
-        /* CLUSTER RESET [SOFT|HARD] */
-        int hard = 0;
-
-        /* Parse soft/hard argument. Default is soft. */
-        if (c->argc == 3) {
-            if (!strcasecmp(c->argv[2]->ptr,"hard")) {
-                hard = 1;
-            } else if (!strcasecmp(c->argv[2]->ptr,"soft")) {
-                hard = 0;
-            } else {
-                addReplyErrorObject(c,shared.syntaxerr);
-                return;
-            }
-        }
+    /* No '}' or nothing between {} ? Hash the whole key. */
+    if (e == keylen || e == s+1) return crc16(key,keylen) & 0x3FFF;
 
-        /* Slaves can be reset while containing data, but not master nodes
-         * that must be empty. */
-        if (nodeIsMaster(myself) && dictSize(c->db->dict) != 0) {
-            addReplyError(c,"CLUSTER RESET can't be called with "
-                            "master nodes containing keys");
-            return;
-        }
-        clusterReset(hard);
-        addReply(c,shared.ok);
-    } else if (!strcasecmp(c->argv[1]->ptr,"links") && c->argc == 2) {
-        /* CLUSTER LINKS */
-        addReplyClusterLinksDescription(c);
-    } else {
-        addReplySubcommandSyntaxError(c);
-        return;
-    }
+    /* If we are here there is both a { and a } on its right. Hash
+     * what is in the middle between { and }. */
+    return crc16(key+s+1,e-s-1) & 0x3FFF;
 }
 
-void removeChannelsInSlot(unsigned int slot) {
-    unsigned int channelcount = countChannelsInSlot(slot);
-    if (channelcount == 0) return;
-
-    /* Retrieve all the channels for the slot. */
-    robj **channels = zmalloc(sizeof(robj*)*channelcount);
-    raxIterator iter;
-    int j = 0;
-    unsigned char indexed[2];
-
-    indexed[0] = (slot >> 8) & 0xff;
-    indexed[1] = slot & 0xff;
-    raxStart(&iter,server.cluster->slots_to_channels);
-    raxSeek(&iter,">=",indexed,2);
-    while(raxNext(&iter)) {
-        if (iter.key[0] != indexed[0] || iter.key[1] != indexed[1]) break;
-        channels[j++] = createStringObject((char*)iter.key + 2, iter.key_len - 2);
+/* If it can be inferred that the given glob-style pattern, as implemented in
+ * stringmatchlen() in util.c, only can match keys belonging to a single slot,
+ * that slot is returned. Otherwise -1 is returned. */
+int patternHashSlot(char *pattern, int length) {
+    int s = -1; /* index of the first '{' */
+
+    for (int i = 0; i < length; i++) {
+        if (pattern[i] == '*' || pattern[i] == '?' || pattern[i] == '[') {
+            /* Wildcard or character class found. Keys can be in any slot. */
+            return -1;
+        } else if (pattern[i] == '\\') {
+            /* Escaped character. Computing slot in this case is not
+             * implemented. We would need a temp buffer. */
+            return -1;
+        } else if (s == -1 && pattern[i] == '{') {
+            /* Opening brace '{' found. */
+            s = i;
+        } else if (s >= 0 && pattern[i] == '}' && i == s + 1) {
+            /* Empty tag '{}' found. The whole key is hashed. Ignore braces. */
+            s = -2;
+        } else if (s >= 0 && pattern[i] == '}') {
+            /* Non-empty tag '{...}' found. Hash what's between braces. */
+            return crc16(pattern + s + 1, i - s - 1) & 0x3FFF;
+        }
+    }
+
+    /* The pattern matches a single key. Hash the whole pattern. */
+    return crc16(pattern, length) & 0x3FFF;
+}
+
+ConnectionType *connTypeOfCluster(void) {
+    if (server.tls_cluster) {
+        return connectionTypeTls();
     }
-    raxStop(&iter);
 
-    pubsubUnsubscribeShardChannels(channels, channelcount);
-    zfree(channels);
+    return connectionTypeTcp();
 }
 
 /* -----------------------------------------------------------------------------
@@ -6580,7 +192,7 @@ void restoreCommand(client *c) {
                    lfu_freq == -1)
         {
             if (getLongLongFromObjectOrReply(c,c->argv[j+1],&lru_idle,NULL)
-                    != C_OK) return;
+                != C_OK) return;
             if (lru_idle < 0) {
                 addReplyError(c,"Invalid IDLETIME value, must be >= 0");
                 return;
@@ -6591,7 +203,7 @@ void restoreCommand(client *c) {
                    lru_idle == -1)
         {
             if (getLongLongFromObjectOrReply(c,c->argv[j+1],&lfu_freq,NULL)
-                    != C_OK) return;
+                != C_OK) return;
             if (lfu_freq < 0 || lfu_freq > 255) {
                 addReplyError(c,"Invalid FREQ value, must be >= 0 and <= 255");
                 return;
@@ -6653,7 +265,16 @@ void restoreCommand(client *c) {
     }
 
     /* Create the key and set the TTL if any */
-    dbAdd(c->db,key,obj);
+    dictEntry *de = dbAdd(c->db,key,obj);
+
+    /* If minExpiredField was set, then the object is hash with expiration
+     * on fields and need to register it in global HFE DS */
+    if (obj->type == OBJ_HASH) {
+        uint64_t minExpiredField = hashTypeGetMinExpire(obj, 1);
+        if (minExpiredField != EB_EXPIRE_TIME_INVALID)
+            hashTypeAddToExpires(c->db, dictGetKey(de), obj, minExpiredField);
+    }
+
     if (ttl) {
         setExpire(c,c->db,key,ttl);
         if (!absttl) {
@@ -6670,7 +291,6 @@ void restoreCommand(client *c) {
     addReply(c,shared.ok);
     server.dirty++;
 }
-
 /* MIGRATE socket cache implementation.
  *
  * We take a map between host:ip and a TCP socket that we used to connect
@@ -6726,7 +346,7 @@ migrateCachedSocket* migrateGetSocket(client *c, robj *host, robj *port, long ti
     /* Create the connection */
     conn = connCreate(connTypeOfCluster());
     if (connBlockingConnect(conn, host->ptr, atoi(port->ptr), timeout)
-            != C_OK) {
+        != C_OK) {
         addReplyError(c,"-IOERR error or timeout connecting to the client");
         connClose(conn);
         sdsfree(name);
@@ -6833,8 +453,8 @@ void migrateCommand(client *c) {
         } else if (!strcasecmp(c->argv[j]->ptr,"keys")) {
             if (sdslen(c->argv[3]->ptr) != 0) {
                 addReplyError(c,
-                    "When using MIGRATE KEYS option, the key argument"
-                    " must be set to the empty string");
+                              "When using MIGRATE KEYS option, the key argument"
+                              " must be set to the empty string");
                 return;
             }
             first_key = j+1;
@@ -6876,7 +496,7 @@ void migrateCommand(client *c) {
         return;
     }
 
-try_again:
+    try_again:
     write_error = 0;
 
     /* Connect */
@@ -6895,10 +515,10 @@ void migrateCommand(client *c) {
         serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"AUTH",4));
         if (username) {
             serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,username,
-                                 sdslen(username)));
+                                                           sdslen(username)));
         }
         serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,password,
-            sdslen(password)));
+                                                       sdslen(password)));
     }
 
     /* Send the SELECT command if the current DB is not already selected. */
@@ -6934,24 +554,24 @@ void migrateCommand(client *c) {
         kv[non_expired++] = kv[j];
 
         serverAssertWithInfo(c,NULL,
-            rioWriteBulkCount(&cmd,'*',replace ? 5 : 4));
+                             rioWriteBulkCount(&cmd,'*',replace ? 5 : 4));
 
         if (server.cluster_enabled)
             serverAssertWithInfo(c,NULL,
-                rioWriteBulkString(&cmd,"RESTORE-ASKING",14));
+                                 rioWriteBulkString(&cmd,"RESTORE-ASKING",14));
         else
             serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"RESTORE",7));
         serverAssertWithInfo(c,NULL,sdsEncodedObject(kv[j]));
         serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,kv[j]->ptr,
-                sdslen(kv[j]->ptr)));
+                                                       sdslen(kv[j]->ptr)));
         serverAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,ttl));
 
         /* Emit the payload argument, that is the serialized object using
          * the DUMP format. */
         createDumpPayload(&payload,ov[j],kv[j],dbid);
         serverAssertWithInfo(c,NULL,
-            rioWriteBulkString(&cmd,payload.io.buffer.ptr,
-                               sdslen(payload.io.buffer.ptr)));
+                             rioWriteBulkString(&cmd,payload.io.buffer.ptr,
+                                                sdslen(payload.io.buffer.ptr)));
         sdsfree(payload.io.buffer.ptr);
 
         /* Add the REPLACE option to the RESTORE command if it was specified
@@ -7023,7 +643,7 @@ void migrateCommand(client *c) {
 
                 error_from_target = 1;
                 addReplyErrorFormat(c,"Target instance replied with error: %s",
-                    errbuf+1);
+                                    errbuf+1);
             }
         } else {
             if (!copy) {
@@ -7099,7 +719,7 @@ void migrateCommand(client *c) {
 /* On socket errors we try to close the cached socket and try again.
  * It is very common for the cached socket to get closed, if just reopening
  * it works it's a shame to notify the error to the caller. */
-socket_err:
+    socket_err:
     /* Cleanup we want to perform in both the retry and no retry case.
      * Note: Closing the migrate socket will also force SELECT next time. */
     sdsfree(cmd.io.buffer.ptr);
@@ -7112,58 +732,220 @@ void migrateCommand(client *c) {
     zfree(newargv);
     newargv = NULL; /* This will get reallocated on retry. */
 
-    /* Retry only if it's not a timeout and we never attempted a retry
-     * (or the code jumping here did not set may_retry to zero). */
-    if (errno != ETIMEDOUT && may_retry) {
-        may_retry = 0;
-        goto try_again;
+    /* Retry only if it's not a timeout and we never attempted a retry
+     * (or the code jumping here did not set may_retry to zero). */
+    if (errno != ETIMEDOUT && may_retry) {
+        may_retry = 0;
+        goto try_again;
+    }
+
+    /* Cleanup we want to do if no retry is attempted. */
+    zfree(ov); zfree(kv);
+    addReplyErrorSds(c, sdscatprintf(sdsempty(),
+                                     "-IOERR error or timeout %s to target instance",
+                                     write_error ? "writing" : "reading"));
+    return;
+}
+
+/* Cluster node sanity check. Returns C_OK if the node id
+ * is valid an C_ERR otherwise. */
+int verifyClusterNodeId(const char *name, int length) {
+    if (length != CLUSTER_NAMELEN) return C_ERR;
+    for (int i = 0; i < length; i++) {
+        if (name[i] >= 'a' && name[i] <= 'z') continue;
+        if (name[i] >= '0' && name[i] <= '9') continue;
+        return C_ERR;
+    }
+    return C_OK;
+}
+
+int isValidAuxChar(int c) {
+    return isalnum(c) || (strchr("!#$%&()*+:;<>?@[]^{|}~", c) == NULL);
+}
+
+int isValidAuxString(char *s, unsigned int length) {
+    for (unsigned i = 0; i < length; i++) {
+        if (!isValidAuxChar(s[i])) return 0;
     }
+    return 1;
+}
 
-    /* Cleanup we want to do if no retry is attempted. */
-    zfree(ov); zfree(kv);
-    addReplyErrorSds(c, sdscatprintf(sdsempty(),
-                                  "-IOERR error or timeout %s to target instance",
-                                  write_error ? "writing" : "reading"));
-    return;
+void clusterCommandMyId(client *c) {
+    char *name = clusterNodeGetName(getMyClusterNode());
+    if (name) {
+        addReplyBulkCBuffer(c,name, CLUSTER_NAMELEN);
+    } else {
+        addReplyError(c, "No ID yet");
+    }
 }
 
-/* -----------------------------------------------------------------------------
- * Cluster functions related to serving / redirecting clients
- * -------------------------------------------------------------------------- */
+char* getMyClusterId(void) {
+    return clusterNodeGetName(getMyClusterNode());
+}
 
-/* The ASKING command is required after a -ASK redirection.
- * The client should issue ASKING before to actually send the command to
- * the target instance. See the Redis Cluster specification for more
- * information. */
-void askingCommand(client *c) {
-    if (server.cluster_enabled == 0) {
-        addReplyError(c,"This instance has cluster support disabled");
-        return;
+void clusterCommandMyShardId(client *c) {
+    char *sid = clusterNodeGetShardId(getMyClusterNode());
+    if (sid) {
+        addReplyBulkCBuffer(c,sid, CLUSTER_NAMELEN);
+    } else {
+        addReplyError(c, "No shard ID yet");
     }
-    c->flags |= CLIENT_ASKING;
-    addReply(c,shared.ok);
 }
 
-/* The READONLY command is used by clients to enter the read-only mode.
- * In this mode slaves will not redirect clients as long as clients access
- * with read-only commands to keys that are served by the slave's master. */
-void readonlyCommand(client *c) {
+/* When a cluster command is called, we need to decide whether to return TLS info or
+ * non-TLS info by the client's connection type. However if the command is called by
+ * a Lua script or RM_call, there is no connection in the fake client, so we use
+ * server.current_client here to get the real client if available. And if it is not
+ * available (modules may call commands without a real client), we return the default
+ * info, which is determined by server.tls_cluster. */
+static int shouldReturnTlsInfo(void) {
+    if (server.current_client && server.current_client->conn) {
+        return connIsTLS(server.current_client->conn);
+    } else {
+        return server.tls_cluster;
+    }
+}
+
+unsigned int countKeysInSlot(unsigned int slot) {
+    return kvstoreDictSize(server.db->keys, slot);
+}
+
+void clusterCommandHelp(client *c) {
+    const char *help[] = {
+            "COUNTKEYSINSLOT <slot>",
+            "    Return the number of keys in <slot>.",
+            "GETKEYSINSLOT <slot> <count>",
+            "    Return key names stored by current node in a slot.",
+            "INFO",
+            "    Return information about the cluster.",
+            "KEYSLOT <key>",
+            "    Return the hash slot for <key>.",
+            "MYID",
+            "    Return the node id.",
+            "MYSHARDID",
+            "    Return the node's shard id.",
+            "NODES",
+            "    Return cluster configuration seen by node. Output format:",
+            "    <id> <ip:port@bus-port[,hostname]> <flags> <master> <pings> <pongs> <epoch> <link> <slot> ...",
+            "REPLICAS <node-id>",
+            "    Return <node-id> replicas.",
+            "SLOTS",
+            "    Return information about slots range mappings. Each range is made of:",
+            "    start, end, master and replicas IP addresses, ports and ids",
+            "SHARDS",
+            "    Return information about slot range mappings and the nodes associated with them.",
+            NULL
+    };
+
+    addExtendedReplyHelp(c, help, clusterCommandExtendedHelp());
+}
+
+void clusterCommand(client *c) {
     if (server.cluster_enabled == 0) {
         addReplyError(c,"This instance has cluster support disabled");
         return;
     }
-    c->flags |= CLIENT_READONLY;
-    addReply(c,shared.ok);
-}
 
-/* The READWRITE command just clears the READONLY command state. */
-void readwriteCommand(client *c) {
-    if (server.cluster_enabled == 0) {
-        addReplyError(c,"This instance has cluster support disabled");
+    if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
+        clusterCommandHelp(c);
+    } else  if (!strcasecmp(c->argv[1]->ptr,"nodes") && c->argc == 2) {
+        /* CLUSTER NODES */
+        /* Report TLS ports to TLS client, and report non-TLS port to non-TLS client. */
+        sds nodes = clusterGenNodesDescription(c, 0, shouldReturnTlsInfo());
+        addReplyVerbatim(c,nodes,sdslen(nodes),"txt");
+        sdsfree(nodes);
+    } else if (!strcasecmp(c->argv[1]->ptr,"myid") && c->argc == 2) {
+        /* CLUSTER MYID */
+        clusterCommandMyId(c);
+    } else if (!strcasecmp(c->argv[1]->ptr,"myshardid") && c->argc == 2) {
+        /* CLUSTER MYSHARDID */
+        clusterCommandMyShardId(c);
+    } else if (!strcasecmp(c->argv[1]->ptr,"slots") && c->argc == 2) {
+        /* CLUSTER SLOTS */
+        clusterCommandSlots(c);
+    } else if (!strcasecmp(c->argv[1]->ptr,"shards") && c->argc == 2) {
+        /* CLUSTER SHARDS */
+        clusterCommandShards(c);
+    } else if (!strcasecmp(c->argv[1]->ptr,"info") && c->argc == 2) {
+        /* CLUSTER INFO */
+
+        sds info = genClusterInfoString();
+
+        /* Produce the reply protocol. */
+        addReplyVerbatim(c,info,sdslen(info),"txt");
+        sdsfree(info);
+    } else if (!strcasecmp(c->argv[1]->ptr,"keyslot") && c->argc == 3) {
+        /* CLUSTER KEYSLOT <key> */
+        sds key = c->argv[2]->ptr;
+
+        addReplyLongLong(c,keyHashSlot(key,sdslen(key)));
+    } else if (!strcasecmp(c->argv[1]->ptr,"countkeysinslot") && c->argc == 3) {
+        /* CLUSTER COUNTKEYSINSLOT <slot> */
+        long long slot;
+
+        if (getLongLongFromObjectOrReply(c,c->argv[2],&slot,NULL) != C_OK)
+            return;
+        if (slot < 0 || slot >= CLUSTER_SLOTS) {
+            addReplyError(c,"Invalid slot");
+            return;
+        }
+        addReplyLongLong(c,countKeysInSlot(slot));
+    } else if (!strcasecmp(c->argv[1]->ptr,"getkeysinslot") && c->argc == 4) {
+        /* CLUSTER GETKEYSINSLOT <slot> <count> */
+        long long maxkeys, slot;
+
+        if (getLongLongFromObjectOrReply(c,c->argv[2],&slot,NULL) != C_OK)
+            return;
+        if (getLongLongFromObjectOrReply(c,c->argv[3],&maxkeys,NULL)
+            != C_OK)
+            return;
+        if (slot < 0 || slot >= CLUSTER_SLOTS || maxkeys < 0) {
+            addReplyError(c,"Invalid slot or number of keys");
+            return;
+        }
+
+        unsigned int keys_in_slot = countKeysInSlot(slot);
+        unsigned int numkeys = maxkeys > keys_in_slot ? keys_in_slot : maxkeys;
+        addReplyArrayLen(c,numkeys);
+        kvstoreDictIterator *kvs_di = NULL;
+        dictEntry *de = NULL;
+        kvs_di = kvstoreGetDictIterator(server.db->keys, slot);
+        for (unsigned int i = 0; i < numkeys; i++) {
+            de = kvstoreDictIteratorNext(kvs_di);
+            serverAssert(de != NULL);
+            sds sdskey = dictGetKey(de);
+            addReplyBulkCBuffer(c, sdskey, sdslen(sdskey));
+        }
+        kvstoreReleaseDictIterator(kvs_di);
+    } else if ((!strcasecmp(c->argv[1]->ptr,"slaves") ||
+                !strcasecmp(c->argv[1]->ptr,"replicas")) && c->argc == 3) {
+        /* CLUSTER SLAVES <NODE ID> */
+        /* CLUSTER REPLICAS <NODE ID> */
+        clusterNode *n = clusterLookupNode(c->argv[2]->ptr, sdslen(c->argv[2]->ptr));
+        int j;
+
+        /* Lookup the specified node in our table. */
+        if (!n) {
+            addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[2]->ptr);
+            return;
+        }
+
+        if (clusterNodeIsSlave(n)) {
+            addReplyError(c,"The specified node is not a master");
+            return;
+        }
+
+        /* Report TLS ports to TLS client, and report non-TLS port to non-TLS client. */
+        addReplyArrayLen(c, clusterNodeNumSlaves(n));
+        for (j = 0; j < clusterNodeNumSlaves(n); j++) {
+            sds ni = clusterGenNodeDescription(c, clusterNodeGetSlave(n, j), shouldReturnTlsInfo());
+            addReplyBulkCString(c,ni);
+            sdsfree(ni);
+        }
+    } else if(!clusterCommandSpecial(c)) {
+        addReplySubcommandSyntaxError(c);
         return;
     }
-    c->flags &= ~CLIENT_READONLY;
-    addReply(c,shared.ok);
 }
 
 /* Return the pointer to the cluster node that is able to serve the command.
@@ -7199,13 +981,15 @@ void readwriteCommand(client *c) {
  * CLUSTER_REDIR_DOWN_STATE and CLUSTER_REDIR_DOWN_RO_STATE if the cluster is
  * down but the user attempts to execute a command that addresses one or more keys. */
 clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *error_code) {
+    clusterNode *myself = getMyClusterNode();
     clusterNode *n = NULL;
     robj *firstkey = NULL;
     int multiple_keys = 0;
     multiState *ms, _ms;
     multiCmd mc;
     int i, slot = 0, migrating_slot = 0, importing_slot = 0, missing_keys = 0,
-        existing_keys = 0;
+            existing_keys = 0;
+    int pubsubshard_included = 0; /* Flag to indicate if a pubsub shard cmd is included. */
 
     /* Allow any key to be set if a module disabled cluster redirections. */
     if (server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_REDIRECTION)
@@ -7237,10 +1021,6 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in
         mc.cmd = cmd;
     }
 
-    int is_pubsubshard = cmd->proc == ssubscribeCommand ||
-            cmd->proc == sunsubscribeCommand ||
-            cmd->proc == spublishCommand;
-
     /* Check that all the keys are in the same hash slot, and obtain this
      * slot and the node associated. */
     for (i = 0; i < ms->count; i++) {
@@ -7253,6 +1033,13 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in
         margc = ms->commands[i].argc;
         margv = ms->commands[i].argv;
 
+        /* Only valid for sharded pubsub as regular pubsub can operate on any node and bypasses this layer. */
+        if (!pubsubshard_included &&
+            doesCommandHaveChannelsWithFlags(mcmd, CMD_CHANNEL_PUBLISH | CMD_CHANNEL_SUBSCRIBE))
+        {
+            pubsubshard_included = 1;
+        }
+
         getKeysResult result = GETKEYS_RESULT_INIT;
         numkeys = getKeysFromCommand(mcmd,margv,margc,&result);
         keyindex = result.keys;
@@ -7267,7 +1054,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in
                  * and node. */
                 firstkey = thiskey;
                 slot = thisslot;
-                n = server.cluster->slots[slot];
+                n = getNodeBySlot(slot);
 
                 /* Error: If a slot is not served, we are in "cluster down"
                  * state. However the state is yet to be updated, so this was
@@ -7286,10 +1073,10 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in
                  * error). To do so we set the importing/migrating state and
                  * increment a counter for every missing key. */
                 if (n == myself &&
-                    server.cluster->migrating_slots_to[slot] != NULL)
+                    getMigratingSlotDest(slot) != NULL)
                 {
                     migrating_slot = 1;
-                } else if (server.cluster->importing_slots_from[slot] != NULL) {
+                } else if (getImportingSlotSource(slot) != NULL) {
                     importing_slot = 1;
                 }
             } else {
@@ -7300,7 +1087,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in
                     getKeysFreeResult(&result);
                     if (error_code)
                         *error_code = CLUSTER_REDIR_CROSS_SLOT;
-                    return NULL;                  
+                    return NULL;
                 }
                 if (importing_slot && !multiple_keys && !equalStringObjects(firstkey,thiskey)) {
                     /* Flag this request as one with multiple different
@@ -7316,7 +1103,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in
              * node until the migration completes with CLUSTER SETSLOT <slot>
              * NODE <node-id>. */
             int flags = LOOKUP_NOTOUCH | LOOKUP_NOSTATS | LOOKUP_NONOTIFY | LOOKUP_NOEXPIRE;
-            if ((migrating_slot || importing_slot) && !is_pubsubshard)
+            if ((migrating_slot || importing_slot) && !pubsubshard_included)
             {
                 if (lookupKeyReadWithFlags(&server.db[0], thiskey, flags) == NULL) missing_keys++;
                 else existing_keys++;
@@ -7332,8 +1119,8 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in
     uint64_t cmd_flags = getCommandFlags(c);
     /* Cluster is globally down but we got keys? We only serve the request
      * if it is a read command and when allow_reads_when_down is enabled. */
-    if (server.cluster->state != CLUSTER_OK) {
-        if (is_pubsubshard) {
+    if (!isClusterHealthy()) {
+        if (pubsubshard_included) {
             if (!server.cluster_allow_pubsubshard_when_down) {
                 if (error_code) *error_code = CLUSTER_REDIR_DOWN_STATE;
                 return NULL;
@@ -7372,7 +1159,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in
             return NULL;
         } else {
             if (error_code) *error_code = CLUSTER_REDIR_ASK;
-            return server.cluster->migrating_slots_to[slot];
+            return getMigratingSlotDest(slot);
         }
     }
 
@@ -7396,15 +1183,15 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in
      * is serving, we can reply without redirection. */
     int is_write_command = (cmd_flags & CMD_WRITE) ||
                            (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_WRITE));
-    if (((c->flags & CLIENT_READONLY) || is_pubsubshard) &&
+    if (((c->flags & CLIENT_READONLY) || pubsubshard_included) &&
         !is_write_command &&
-        nodeIsSlave(myself) &&
-        myself->slaveof == n)
+        clusterNodeIsSlave(myself) &&
+        clusterNodeGetSlaveof(myself) == n)
     {
         return myself;
     }
 
-    /* Base case: just return the right node. However if this node is not
+    /* Base case: just return the right node. However, if this node is not
      * myself, set error_code to MOVED since we need to issue a redirection. */
     if (n != myself && error_code) *error_code = CLUSTER_REDIR_MOVED;
     return n;
@@ -7435,11 +1222,11 @@ void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_co
                error_code == CLUSTER_REDIR_ASK)
     {
         /* Report TLS ports to TLS client, and report non-TLS port to non-TLS client. */
-        int port = getNodeClientPort(n, connIsTLS(c->conn));
+        int port = clusterNodeClientPort(n, shouldReturnTlsInfo());
         addReplyErrorSds(c,sdscatprintf(sdsempty(),
-            "-%s %d %s:%d",
-            (error_code == CLUSTER_REDIR_ASK) ? "ASK" : "MOVED",
-            hashslot, getPreferredEndpoint(n), port));
+                                        "-%s %d %s:%d",
+                                        (error_code == CLUSTER_REDIR_ASK) ? "ASK" : "MOVED",
+                                        hashslot, clusterNodePreferredEndpoint(n), port));
     } else {
         serverPanic("getNodeByQuery() unknown error.");
     }
@@ -7457,6 +1244,7 @@ void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_co
  * longer handles, the client is sent a redirection error, and the function
  * returns 1. Otherwise 0 is returned and no operation is performed. */
 int clusterRedirectBlockedClientIfNeeded(client *c) {
+    clusterNode *myself = getMyClusterNode();
     if (c->flags & CLIENT_BLOCKED &&
         (c->bstate.btype == BLOCKED_LIST ||
          c->bstate.btype == BLOCKED_ZSET ||
@@ -7470,7 +1258,7 @@ int clusterRedirectBlockedClientIfNeeded(client *c) {
          * If the cluster is configured to allow reads on cluster down, we
          * still want to emit this error since a write will be required
          * to unblock them which may never come.  */
-        if (server.cluster->state == CLUSTER_FAIL) {
+        if (!isClusterHealthy()) {
             clusterRedirectClient(c,NULL,0,CLUSTER_REDIR_DOWN_STATE);
             return 1;
         }
@@ -7485,13 +1273,13 @@ int clusterRedirectBlockedClientIfNeeded(client *c) {
         if ((de = dictNext(di)) != NULL) {
             robj *key = dictGetKey(de);
             int slot = keyHashSlot((char*)key->ptr, sdslen(key->ptr));
-            clusterNode *node = server.cluster->slots[slot];
+            clusterNode *node = getNodeBySlot(slot);
 
             /* if the client is read-only and attempting to access key that our
              * replica can handle, allow it. */
             if ((c->flags & CLIENT_READONLY) &&
                 !(c->lastcmd->flags & CMD_WRITE) &&
-                nodeIsSlave(myself) && myself->slaveof == node)
+                clusterNodeIsSlave(myself) && clusterNodeGetSlaveof(myself) == node)
             {
                 node = myself;
             }
@@ -7499,15 +1287,14 @@ int clusterRedirectBlockedClientIfNeeded(client *c) {
             /* We send an error and unblock the client if:
              * 1) The slot is unassigned, emitting a cluster down error.
              * 2) The slot is not handled by this node, nor being imported. */
-            if (node != myself &&
-                server.cluster->importing_slots_from[slot] == NULL)
+            if (node != myself && getImportingSlotSource(slot) == NULL)
             {
                 if (node == NULL) {
                     clusterRedirectClient(c,NULL,0,
-                        CLUSTER_REDIR_DOWN_UNBOUND);
+                                          CLUSTER_REDIR_DOWN_UNBOUND);
                 } else {
                     clusterRedirectClient(c,node,slot,
-                        CLUSTER_REDIR_MOVED);
+                                          CLUSTER_REDIR_MOVED);
                 }
                 dictReleaseIterator(di);
                 return 1;
@@ -7518,160 +1305,169 @@ int clusterRedirectBlockedClientIfNeeded(client *c) {
     return 0;
 }
 
-/* Slot to Key API. This is used by Redis Cluster in order to obtain in
- * a fast way a key that belongs to a specified hash slot. This is useful
- * while rehashing the cluster and in other conditions when we need to
- * understand if we have keys for a given hash slot. */
-
-void slotToKeyAddEntry(dictEntry *entry, redisDb *db) {
-    sds key = dictGetKey(entry);
-    unsigned int hashslot = keyHashSlot(key, sdslen(key));
-    slotToKeys *slot_to_keys = &(*db->slots_to_keys).by_slot[hashslot];
-    slot_to_keys->count++;
-
-    /* Insert entry before the first element in the list. */
-    dictEntry *first = slot_to_keys->head;
-    dictEntryNextInSlot(entry) = first;
-    if (first != NULL) {
-        serverAssert(dictEntryPrevInSlot(first) == NULL);
-        dictEntryPrevInSlot(first) = entry;
+/* Returns an indication if the replica node is fully available
+ * and should be listed in CLUSTER SLOTS response.
+ * Returns 1 for available nodes, 0 for nodes that have
+ * not finished their initial sync, in failed state, or are
+ * otherwise considered not available to serve read commands. */
+static int isReplicaAvailable(clusterNode *node) {
+    if (clusterNodeIsFailing(node)) {
+        return 0;
+    }
+    long long repl_offset = clusterNodeReplOffset(node);
+    if (clusterNodeIsMyself(node)) {
+        /* Nodes do not update their own information
+         * in the cluster node list. */
+        repl_offset = replicationGetSlaveOffset();
     }
-    serverAssert(dictEntryPrevInSlot(entry) == NULL);
-    slot_to_keys->head = entry;
+    return (repl_offset != 0);
 }
 
-void slotToKeyDelEntry(dictEntry *entry, redisDb *db) {
-    sds key = dictGetKey(entry);
-    unsigned int hashslot = keyHashSlot(key, sdslen(key));
-    slotToKeys *slot_to_keys = &(*db->slots_to_keys).by_slot[hashslot];
-    slot_to_keys->count--;
-
-    /* Connect previous and next entries to each other. */
-    dictEntry *next = dictEntryNextInSlot(entry);
-    dictEntry *prev = dictEntryPrevInSlot(entry);
-    if (next != NULL) {
-        dictEntryPrevInSlot(next) = prev;
-    }
-    if (prev != NULL) {
-        dictEntryNextInSlot(prev) = next;
+void addNodeToNodeReply(client *c, clusterNode *node) {
+    char* hostname = clusterNodeHostname(node);
+    addReplyArrayLen(c, 4);
+    if (server.cluster_preferred_endpoint_type == CLUSTER_ENDPOINT_TYPE_IP) {
+        addReplyBulkCString(c, clusterNodeIp(node));
+    } else if (server.cluster_preferred_endpoint_type == CLUSTER_ENDPOINT_TYPE_HOSTNAME) {
+        if (hostname != NULL && hostname[0] != '\0') {
+            addReplyBulkCString(c, hostname);
+        } else {
+            addReplyBulkCString(c, "?");
+        }
+    } else if (server.cluster_preferred_endpoint_type == CLUSTER_ENDPOINT_TYPE_UNKNOWN_ENDPOINT) {
+        addReplyNull(c);
     } else {
-        /* The removed entry was the first in the list. */
-        serverAssert(slot_to_keys->head == entry);
-        slot_to_keys->head = next;
+        serverPanic("Unrecognized preferred endpoint type");
     }
-}
 
-/* Updates neighbour entries when an entry has been replaced (e.g. reallocated
- * during active defrag). */
-void slotToKeyReplaceEntry(dict *d, dictEntry *entry) {
-    dictEntry *next = dictEntryNextInSlot(entry);
-    dictEntry *prev = dictEntryPrevInSlot(entry);
-    if (next != NULL) {
-        dictEntryPrevInSlot(next) = entry;
+    /* Report TLS ports to TLS client, and report non-TLS port to non-TLS client. */
+    addReplyLongLong(c, clusterNodeClientPort(node, shouldReturnTlsInfo()));
+    addReplyBulkCBuffer(c, clusterNodeGetName(node), CLUSTER_NAMELEN);
+
+    /* Add the additional endpoint information, this is all the known networking information
+     * that is not the preferred endpoint. Note the logic is evaluated twice so we can
+     * correctly report the number of additional network arguments without using a deferred
+     * map, an assertion is made at the end to check we set the right length. */
+    int length = 0;
+    if (server.cluster_preferred_endpoint_type != CLUSTER_ENDPOINT_TYPE_IP) {
+        length++;
     }
-    if (prev != NULL) {
-        dictEntryNextInSlot(prev) = entry;
-    } else {
-        /* The replaced entry was the first in the list. */
-        sds key = dictGetKey(entry);
-        unsigned int hashslot = keyHashSlot(key, sdslen(key));
-        clusterDictMetadata *dictmeta = dictMetadata(d);
-        redisDb *db = dictmeta->db;
-        slotToKeys *slot_to_keys = &(*db->slots_to_keys).by_slot[hashslot];
-        slot_to_keys->head = entry;
+    if (server.cluster_preferred_endpoint_type != CLUSTER_ENDPOINT_TYPE_HOSTNAME
+        && hostname != NULL && hostname[0] != '\0')
+    {
+        length++;
     }
-}
+    addReplyMapLen(c, length);
 
-/* Initialize slots-keys map of given db. */
-void slotToKeyInit(redisDb *db) {
-    db->slots_to_keys = zcalloc(sizeof(clusterSlotToKeyMapping));
-    clusterDictMetadata *dictmeta = dictMetadata(db->dict);
-    dictmeta->db = db;
+    if (server.cluster_preferred_endpoint_type != CLUSTER_ENDPOINT_TYPE_IP) {
+        addReplyBulkCString(c, "ip");
+        addReplyBulkCString(c, clusterNodeIp(node));
+        length--;
+    }
+    if (server.cluster_preferred_endpoint_type != CLUSTER_ENDPOINT_TYPE_HOSTNAME
+        && hostname != NULL && hostname[0] != '\0')
+    {
+        addReplyBulkCString(c, "hostname");
+        addReplyBulkCString(c, hostname);
+        length--;
+    }
+    serverAssert(length == 0);
 }
 
-/* Empty slots-keys map of given db. */
-void slotToKeyFlush(redisDb *db) {
-    memset(db->slots_to_keys, 0,
-        sizeof(clusterSlotToKeyMapping));
-}
+void addNodeReplyForClusterSlot(client *c, clusterNode *node, int start_slot, int end_slot) {
+    int i, nested_elements = 3; /* slots (2) + master addr (1) */
+    for (i = 0; i < clusterNodeNumSlaves(node); i++) {
+        if (!isReplicaAvailable(clusterNodeGetSlave(node, i))) continue;
+        nested_elements++;
+    }
+    addReplyArrayLen(c, nested_elements);
+    addReplyLongLong(c, start_slot);
+    addReplyLongLong(c, end_slot);
+    addNodeToNodeReply(c, node);
 
-/* Free slots-keys map of given db. */
-void slotToKeyDestroy(redisDb *db) {
-    zfree(db->slots_to_keys);
-    db->slots_to_keys = NULL;
+    /* Remaining nodes in reply are replicas for slot range */
+    for (i = 0; i < clusterNodeNumSlaves(node); i++) {
+        /* This loop is copy/pasted from clusterGenNodeDescription()
+         * with modifications for per-slot node aggregation. */
+        if (!isReplicaAvailable(clusterNodeGetSlave(node, i))) continue;
+        addNodeToNodeReply(c, clusterNodeGetSlave(node, i));
+        nested_elements--;
+    }
+    serverAssert(nested_elements == 3); /* Original 3 elements */
 }
 
-/* Remove all the keys in the specified hash slot.
- * The number of removed items is returned. */
-unsigned int delKeysInSlot(unsigned int hashslot) {
-    unsigned int j = 0;
-
-    dictEntry *de = (*server.db->slots_to_keys).by_slot[hashslot].head;
-    while (de != NULL) {
-        sds sdskey = dictGetKey(de);
-        de = dictEntryNextInSlot(de);
-        robj *key = createStringObject(sdskey, sdslen(sdskey));
-        dbDelete(&server.db[0], key);
-        propagateDeletion(&server.db[0], key, server.lazyfree_lazy_server_del);
-        signalModifiedKey(NULL, &server.db[0], key);
-        moduleNotifyKeyspaceEvent(NOTIFY_GENERIC, "del", key, server.db[0].id);
-        postExecutionUnitOperations();
-        decrRefCount(key);
-        j++;
-        server.dirty++;
-    }
+void clusterCommandSlots(client * c) {
+    /* Format: 1) 1) start slot
+     *            2) end slot
+     *            3) 1) master IP
+     *               2) master port
+     *               3) node ID
+     *            4) 1) replica IP
+     *               2) replica port
+     *               3) node ID
+     *           ... continued until done
+     */
+    clusterNode *n = NULL;
+    int num_masters = 0, start = -1;
+    void *slot_replylen = addReplyDeferredLen(c);
 
-    return j;
-}
+    for (int i = 0; i <= CLUSTER_SLOTS; i++) {
+        /* Find start node and slot id. */
+        if (n == NULL) {
+            if (i == CLUSTER_SLOTS) break;
+            n = getNodeBySlot(i);
+            start = i;
+            continue;
+        }
 
-unsigned int countKeysInSlot(unsigned int hashslot) {
-    return (*server.db->slots_to_keys).by_slot[hashslot].count;
+        /* Add cluster slots info when occur different node with start
+         * or end of slot. */
+        if (i == CLUSTER_SLOTS || n != getNodeBySlot(i)) {
+            addNodeReplyForClusterSlot(c, n, start, i-1);
+            num_masters++;
+            if (i == CLUSTER_SLOTS) break;
+            n = getNodeBySlot(i);
+            start = i;
+        }
+    }
+    setDeferredArrayLen(c, slot_replylen, num_masters);
 }
 
 /* -----------------------------------------------------------------------------
- * Operation(s) on channel rax tree.
+ * Cluster functions related to serving / redirecting clients
  * -------------------------------------------------------------------------- */
 
-void slotToChannelUpdate(sds channel, int add) {
-    size_t keylen = sdslen(channel);
-    unsigned int hashslot = keyHashSlot(channel,keylen);
-    unsigned char buf[64];
-    unsigned char *indexed = buf;
-
-    if (keylen+2 > 64) indexed = zmalloc(keylen+2);
-    indexed[0] = (hashslot >> 8) & 0xff;
-    indexed[1] = hashslot & 0xff;
-    memcpy(indexed+2,channel,keylen);
-    if (add) {
-        raxInsert(server.cluster->slots_to_channels,indexed,keylen+2,NULL,NULL);
-    } else {
-        raxRemove(server.cluster->slots_to_channels,indexed,keylen+2,NULL);
+/* The ASKING command is required after a -ASK redirection.
+ * The client should issue ASKING before to actually send the command to
+ * the target instance. See the Redis Cluster specification for more
+ * information. */
+void askingCommand(client *c) {
+    if (server.cluster_enabled == 0) {
+        addReplyError(c,"This instance has cluster support disabled");
+        return;
     }
-    if (indexed != buf) zfree(indexed);
-}
-
-void slotToChannelAdd(sds channel) {
-    slotToChannelUpdate(channel,1);
+    c->flags |= CLIENT_ASKING;
+    addReply(c,shared.ok);
 }
 
-void slotToChannelDel(sds channel) {
-    slotToChannelUpdate(channel,0);
+/* The READONLY command is used by clients to enter the read-only mode.
+ * In this mode slaves will not redirect clients as long as clients access
+ * with read-only commands to keys that are served by the slave's master. */
+void readonlyCommand(client *c) {
+    if (server.cluster_enabled == 0) {
+        addReplyError(c,"This instance has cluster support disabled");
+        return;
+    }
+    c->flags |= CLIENT_READONLY;
+    addReply(c,shared.ok);
 }
 
-/* Get the count of the channels for a given slot. */
-unsigned int countChannelsInSlot(unsigned int hashslot) {
-    raxIterator iter;
-    int j = 0;
-    unsigned char indexed[2];
-
-    indexed[0] = (hashslot >> 8) & 0xff;
-    indexed[1] = hashslot & 0xff;
-    raxStart(&iter,server.cluster->slots_to_channels);
-    raxSeek(&iter,">=",indexed,2);
-    while(raxNext(&iter)) {
-        if (iter.key[0] != indexed[0] || iter.key[1] != indexed[1]) break;
-        j++;
+/* The READWRITE command just clears the READONLY command state. */
+void readwriteCommand(client *c) {
+    if (server.cluster_enabled == 0) {
+        addReplyError(c,"This instance has cluster support disabled");
+        return;
     }
-    raxStop(&iter);
-    return j;
+    c->flags &= ~CLIENT_READONLY;
+    addReply(c,shared.ok);
 }
diff --git a/src/cluster.h b/src/cluster.h
index 21c9c4499db..f21f1e9c16e 100644
--- a/src/cluster.h
+++ b/src/cluster.h
@@ -2,22 +2,15 @@
 #define __CLUSTER_H
 
 /*-----------------------------------------------------------------------------
- * Redis cluster data structures, defines, exported API.
+ * Redis cluster exported API.
  *----------------------------------------------------------------------------*/
 
-#define CLUSTER_SLOTS 16384
+#define CLUSTER_SLOT_MASK_BITS 14 /* Number of bits used for slot id. */
+#define CLUSTER_SLOTS (1<<CLUSTER_SLOT_MASK_BITS) /* Total number of slots in cluster mode, which is 16384. */
+#define CLUSTER_SLOT_MASK ((unsigned long long)(CLUSTER_SLOTS - 1)) /* Bit mask for slot id stored in LSB. */
 #define CLUSTER_OK 0            /* Everything looks ok */
 #define CLUSTER_FAIL 1          /* The cluster can't work */
 #define CLUSTER_NAMELEN 40      /* sha1 hex length */
-#define CLUSTER_PORT_INCR 10000 /* Cluster port = baseport + PORT_INCR */
-
-/* The following defines are amount of time, sometimes expressed as
- * multiplicators of the node timeout value (when ending with MULT). */
-#define CLUSTER_FAIL_REPORT_VALIDITY_MULT 2 /* Fail report validity. */
-#define CLUSTER_FAIL_UNDO_TIME_MULT 2 /* Undo fail if master is back. */
-#define CLUSTER_MF_TIMEOUT 5000 /* Milliseconds to do a manual failover. */
-#define CLUSTER_MF_PAUSE_MULT 2 /* Master pause manual failover mult. */
-#define CLUSTER_SLAVE_MIGRATION_DELAY 5000 /* Delay for slave migration. */
 
 /* Redirection errors returned by getNodeByQuery(). */
 #define CLUSTER_REDIR_NONE 0          /* Node can serve the request. */
@@ -29,77 +22,8 @@
 #define CLUSTER_REDIR_DOWN_UNBOUND 6  /* -CLUSTERDOWN, unbound slot. */
 #define CLUSTER_REDIR_DOWN_RO_STATE 7 /* -CLUSTERDOWN, allow reads. */
 
-struct clusterNode;
-
-/* clusterLink encapsulates everything needed to talk with a remote node. */
-typedef struct clusterLink {
-    mstime_t ctime;             /* Link creation time */
-    connection *conn;           /* Connection to remote node */
-    list *send_msg_queue;        /* List of messages to be sent */
-    size_t head_msg_send_offset; /* Number of bytes already sent of message at head of queue */
-    unsigned long long send_msg_queue_mem; /* Memory in bytes used by message queue */
-    char *rcvbuf;               /* Packet reception buffer */
-    size_t rcvbuf_len;          /* Used size of rcvbuf */
-    size_t rcvbuf_alloc;        /* Allocated size of rcvbuf */
-    struct clusterNode *node;   /* Node related to this link. Initialized to NULL when unknown */
-    int inbound;                /* 1 if this link is an inbound link accepted from the related node */
-} clusterLink;
-
-/* Cluster node flags and macros. */
-#define CLUSTER_NODE_MASTER 1     /* The node is a master */
-#define CLUSTER_NODE_SLAVE 2      /* The node is a slave */
-#define CLUSTER_NODE_PFAIL 4      /* Failure? Need acknowledge */
-#define CLUSTER_NODE_FAIL 8       /* The node is believed to be malfunctioning */
-#define CLUSTER_NODE_MYSELF 16    /* This node is myself */
-#define CLUSTER_NODE_HANDSHAKE 32 /* We have still to exchange the first ping */
-#define CLUSTER_NODE_NOADDR   64  /* We don't know the address of this node */
-#define CLUSTER_NODE_MEET 128     /* Send a MEET message to this node */
-#define CLUSTER_NODE_MIGRATE_TO 256 /* Master eligible for replica migration. */
-#define CLUSTER_NODE_NOFAILOVER 512 /* Slave will not try to failover. */
-#define CLUSTER_NODE_NULL_NAME "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
-
-#define nodeIsMaster(n) ((n)->flags & CLUSTER_NODE_MASTER)
-#define nodeIsSlave(n) ((n)->flags & CLUSTER_NODE_SLAVE)
-#define nodeInHandshake(n) ((n)->flags & CLUSTER_NODE_HANDSHAKE)
-#define nodeHasAddr(n) (!((n)->flags & CLUSTER_NODE_NOADDR))
-#define nodeWithoutAddr(n) ((n)->flags & CLUSTER_NODE_NOADDR)
-#define nodeTimedOut(n) ((n)->flags & CLUSTER_NODE_PFAIL)
-#define nodeFailed(n) ((n)->flags & CLUSTER_NODE_FAIL)
-#define nodeCantFailover(n) ((n)->flags & CLUSTER_NODE_NOFAILOVER)
-
-/* Reasons why a slave is not able to failover. */
-#define CLUSTER_CANT_FAILOVER_NONE 0
-#define CLUSTER_CANT_FAILOVER_DATA_AGE 1
-#define CLUSTER_CANT_FAILOVER_WAITING_DELAY 2
-#define CLUSTER_CANT_FAILOVER_EXPIRED 3
-#define CLUSTER_CANT_FAILOVER_WAITING_VOTES 4
-#define CLUSTER_CANT_FAILOVER_RELOG_PERIOD (10) /* seconds. */
-
-/* clusterState todo_before_sleep flags. */
-#define CLUSTER_TODO_HANDLE_FAILOVER (1<<0)
-#define CLUSTER_TODO_UPDATE_STATE (1<<1)
-#define CLUSTER_TODO_SAVE_CONFIG (1<<2)
-#define CLUSTER_TODO_FSYNC_CONFIG (1<<3)
-#define CLUSTER_TODO_HANDLE_MANUALFAILOVER (1<<4)
-
-/* Message types.
- *
- * Note that the PING, PONG and MEET messages are actually the same exact
- * kind of packet. PONG is the reply to ping, in the exact format as a PING,
- * while MEET is a special PING that forces the receiver to add the sender
- * as a node (if it is not already in the list). */
-#define CLUSTERMSG_TYPE_PING 0          /* Ping */
-#define CLUSTERMSG_TYPE_PONG 1          /* Pong (reply to Ping) */
-#define CLUSTERMSG_TYPE_MEET 2          /* Meet "let's join" message */
-#define CLUSTERMSG_TYPE_FAIL 3          /* Mark node xxx as failing */
-#define CLUSTERMSG_TYPE_PUBLISH 4       /* Pub/Sub Publish propagation */
-#define CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST 5 /* May I failover? */
-#define CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK 6     /* Yes, you have my vote */
-#define CLUSTERMSG_TYPE_UPDATE 7        /* Another node slots configuration */
-#define CLUSTERMSG_TYPE_MFSTART 8       /* Pause clients for manual failover */
-#define CLUSTERMSG_TYPE_MODULE 9        /* Module cluster API message. */
-#define CLUSTERMSG_TYPE_PUBLISHSHARD 10 /* Pub/Sub Publish shard propagation */
-#define CLUSTERMSG_TYPE_COUNT 11        /* Total number of message types. */
+typedef struct _clusterNode clusterNode;
+struct clusterState;
 
 /* Flags that a module can set in order to prevent certain Redis Cluster
  * features to be enabled. Useful when implementing a different distributed
@@ -108,339 +32,87 @@ typedef struct clusterLink {
 #define CLUSTER_MODULE_FLAG_NO_FAILOVER (1<<1)
 #define CLUSTER_MODULE_FLAG_NO_REDIRECTION (1<<2)
 
-/* This structure represent elements of node->fail_reports. */
-typedef struct clusterNodeFailReport {
-    struct clusterNode *node;  /* Node reporting the failure condition. */
-    mstime_t time;             /* Time of the last report from this node. */
-} clusterNodeFailReport;
-
-typedef struct clusterNode {
-    mstime_t ctime; /* Node object creation time. */
-    char name[CLUSTER_NAMELEN]; /* Node name, hex string, sha1-size */
-    char shard_id[CLUSTER_NAMELEN]; /* shard id, hex string, sha1-size */
-    int flags;      /* CLUSTER_NODE_... */
-    uint64_t configEpoch; /* Last configEpoch observed for this node */
-    unsigned char slots[CLUSTER_SLOTS/8]; /* slots handled by this node */
-    uint16_t *slot_info_pairs; /* Slots info represented as (start/end) pair (consecutive index). */
-    int slot_info_pairs_count; /* Used number of slots in slot_info_pairs */
-    int numslots;   /* Number of slots handled by this node */
-    int numslaves;  /* Number of slave nodes, if this is a master */
-    struct clusterNode **slaves; /* pointers to slave nodes */
-    struct clusterNode *slaveof; /* pointer to the master node. Note that it
-                                    may be NULL even if the node is a slave
-                                    if we don't have the master node in our
-                                    tables. */
-    unsigned long long last_in_ping_gossip; /* The number of the last carried in the ping gossip section */
-    mstime_t ping_sent;      /* Unix time we sent latest ping */
-    mstime_t pong_received;  /* Unix time we received the pong */
-    mstime_t data_received;  /* Unix time we received any data */
-    mstime_t fail_time;      /* Unix time when FAIL flag was set */
-    mstime_t voted_time;     /* Last time we voted for a slave of this master */
-    mstime_t repl_offset_time;  /* Unix time we received offset for this node */
-    mstime_t orphaned_time;     /* Starting time of orphaned master condition */
-    long long repl_offset;      /* Last known repl offset for this node. */
-    char ip[NET_IP_STR_LEN];    /* Latest known IP address of this node */
-    sds hostname;               /* The known hostname for this node */
-    sds human_nodename;         /* The known human readable nodename for this node */
-    int tcp_port;               /* Latest known clients TCP port. */
-    int tls_port;               /* Latest known clients TLS port */
-    int cport;                  /* Latest known cluster port of this node. */
-    clusterLink *link;          /* TCP/IP link established toward this node */
-    clusterLink *inbound_link;  /* TCP/IP link accepted from this node */
-    list *fail_reports;         /* List of nodes signaling this as failing */
-} clusterNode;
-
-/* Slot to keys for a single slot. The keys in the same slot are linked together
- * using dictEntry metadata. */
-typedef struct slotToKeys {
-    uint64_t count;             /* Number of keys in the slot. */
-    dictEntry *head;            /* The first key-value entry in the slot. */
-} slotToKeys;
-
-/* Slot to keys mapping for all slots, opaque outside this file. */
-struct clusterSlotToKeyMapping {
-    slotToKeys by_slot[CLUSTER_SLOTS];
-};
-
-/* Dict entry metadata for cluster mode, used for the Slot to Key API to form a
- * linked list of the entries belonging to the same slot. */
-typedef struct clusterDictEntryMetadata {
-    dictEntry *prev;            /* Prev entry with key in the same slot */
-    dictEntry *next;            /* Next entry with key in the same slot */
-} clusterDictEntryMetadata;
-
-typedef struct {
-    redisDb *db;                /* A link back to the db this dict belongs to */
-} clusterDictMetadata;
-
-typedef struct clusterState {
-    clusterNode *myself;  /* This node */
-    uint64_t currentEpoch;
-    int state;            /* CLUSTER_OK, CLUSTER_FAIL, ... */
-    int size;             /* Num of master nodes with at least one slot */
-    dict *nodes;          /* Hash table of name -> clusterNode structures */
-    dict *shards;         /* Hash table of shard_id -> list (of nodes) structures */
-    dict *nodes_black_list; /* Nodes we don't re-add for a few seconds. */
-    clusterNode *migrating_slots_to[CLUSTER_SLOTS];
-    clusterNode *importing_slots_from[CLUSTER_SLOTS];
-    clusterNode *slots[CLUSTER_SLOTS];
-    rax *slots_to_channels;
-    /* The following fields are used to take the slave state on elections. */
-    mstime_t failover_auth_time; /* Time of previous or next election. */
-    int failover_auth_count;    /* Number of votes received so far. */
-    int failover_auth_sent;     /* True if we already asked for votes. */
-    int failover_auth_rank;     /* This slave rank for current auth request. */
-    uint64_t failover_auth_epoch; /* Epoch of the current election. */
-    int cant_failover_reason;   /* Why a slave is currently not able to
-                                   failover. See the CANT_FAILOVER_* macros. */
-    /* Manual failover state in common. */
-    mstime_t mf_end;            /* Manual failover time limit (ms unixtime).
-                                   It is zero if there is no MF in progress. */
-    /* Manual failover state of master. */
-    clusterNode *mf_slave;      /* Slave performing the manual failover. */
-    /* Manual failover state of slave. */
-    long long mf_master_offset; /* Master offset the slave needs to start MF
-                                   or -1 if still not received. */
-    int mf_can_start;           /* If non-zero signal that the manual failover
-                                   can start requesting masters vote. */
-    /* The following fields are used by masters to take state on elections. */
-    uint64_t lastVoteEpoch;     /* Epoch of the last vote granted. */
-    int todo_before_sleep; /* Things to do in clusterBeforeSleep(). */
-    /* Stats */
-    /* Messages received and sent by type. */
-    long long stats_bus_messages_sent[CLUSTERMSG_TYPE_COUNT];
-    long long stats_bus_messages_received[CLUSTERMSG_TYPE_COUNT];
-    long long stats_pfail_nodes;    /* Number of nodes in PFAIL status,
-                                       excluding nodes without address. */
-    unsigned long long stat_cluster_links_buffer_limit_exceeded;  /* Total number of cluster links freed due to exceeding buffer limit */
-
-    /* Bit map for slots that are no longer claimed by the owner in cluster PING
-     * messages. During slot migration, the owner will stop claiming the slot after
-     * the ownership transfer. Set the bit corresponding to the slot when a node
-     * stops claiming the slot. This prevents spreading incorrect information (that
-     * source still owns the slot) using UPDATE messages. */
-    unsigned char owner_not_claiming_slot[CLUSTER_SLOTS / 8];
-} clusterState;
-
-/* Redis cluster messages header */
-
-/* Initially we don't know our "name", but we'll find it once we connect
- * to the first node, using the getsockname() function. Then we'll use this
- * address for all the next messages. */
-typedef struct {
-    char nodename[CLUSTER_NAMELEN];
-    uint32_t ping_sent;
-    uint32_t pong_received;
-    char ip[NET_IP_STR_LEN];  /* IP address last time it was seen */
-    uint16_t port;              /* primary port last time it was seen */
-    uint16_t cport;             /* cluster port last time it was seen */
-    uint16_t flags;             /* node->flags copy */
-    uint16_t pport;             /* secondary port last time it was seen */
-    uint16_t notused1;
-} clusterMsgDataGossip;
-
-typedef struct {
-    char nodename[CLUSTER_NAMELEN];
-} clusterMsgDataFail;
-
-typedef struct {
-    uint32_t channel_len;
-    uint32_t message_len;
-    unsigned char bulk_data[8]; /* 8 bytes just as placeholder. */
-} clusterMsgDataPublish;
-
-typedef struct {
-    uint64_t configEpoch; /* Config epoch of the specified instance. */
-    char nodename[CLUSTER_NAMELEN]; /* Name of the slots owner. */
-    unsigned char slots[CLUSTER_SLOTS/8]; /* Slots bitmap. */
-} clusterMsgDataUpdate;
-
-typedef struct {
-    uint64_t module_id;     /* ID of the sender module. */
-    uint32_t len;           /* ID of the sender module. */
-    uint8_t type;           /* Type from 0 to 255. */
-    unsigned char bulk_data[3]; /* 3 bytes just as placeholder. */
-} clusterMsgModule;
-
-/* The cluster supports optional extension messages that can be sent
- * along with ping/pong/meet messages to give additional info in a 
- * consistent manner. */
-typedef enum {
-    CLUSTERMSG_EXT_TYPE_HOSTNAME,
-    CLUSTERMSG_EXT_TYPE_HUMAN_NODENAME,
-    CLUSTERMSG_EXT_TYPE_FORGOTTEN_NODE,
-    CLUSTERMSG_EXT_TYPE_SHARDID,
-} clusterMsgPingtypes; 
-
-/* Helper function for making sure extensions are eight byte aligned. */
-#define EIGHT_BYTE_ALIGN(size) ((((size) + 7) / 8) * 8)
-
-typedef struct {
-    char hostname[1]; /* The announced hostname, ends with \0. */
-} clusterMsgPingExtHostname;
-
-typedef struct {
-    char human_nodename[1]; /* The announced nodename, ends with \0. */
-} clusterMsgPingExtHumanNodename;
-
-typedef struct {
-    char name[CLUSTER_NAMELEN]; /* Node name. */
-    uint64_t ttl; /* Remaining time to blacklist the node, in seconds. */
-} clusterMsgPingExtForgottenNode;
-
-static_assert(sizeof(clusterMsgPingExtForgottenNode) % 8 == 0, "");
-
-typedef struct {
-    char shard_id[CLUSTER_NAMELEN]; /* The shard_id, 40 bytes fixed. */
-} clusterMsgPingExtShardId;
-
-typedef struct {
-    uint32_t length; /* Total length of this extension message (including this header) */
-    uint16_t type; /* Type of this extension message (see clusterMsgPingExtTypes) */
-    uint16_t unused; /* 16 bits of padding to make this structure 8 byte aligned. */
-    union {
-        clusterMsgPingExtHostname hostname;
-	clusterMsgPingExtHumanNodename human_nodename;
-        clusterMsgPingExtForgottenNode forgotten_node;
-        clusterMsgPingExtShardId shard_id;
-    } ext[]; /* Actual extension information, formatted so that the data is 8 
-              * byte aligned, regardless of its content. */
-} clusterMsgPingExt;
-
-union clusterMsgData {
-    /* PING, MEET and PONG */
-    struct {
-        /* Array of N clusterMsgDataGossip structures */
-        clusterMsgDataGossip gossip[1];
-        /* Extension data that can optionally be sent for ping/meet/pong
-         * messages. We can't explicitly define them here though, since
-         * the gossip array isn't the real length of the gossip data. */
-    } ping;
-
-    /* FAIL */
-    struct {
-        clusterMsgDataFail about;
-    } fail;
-
-    /* PUBLISH */
-    struct {
-        clusterMsgDataPublish msg;
-    } publish;
-
-    /* UPDATE */
-    struct {
-        clusterMsgDataUpdate nodecfg;
-    } update;
-
-    /* MODULE */
-    struct {
-        clusterMsgModule msg;
-    } module;
-};
-
-#define CLUSTER_PROTO_VER 1 /* Cluster bus protocol version. */
-
-typedef struct {
-    char sig[4];        /* Signature "RCmb" (Redis Cluster message bus). */
-    uint32_t totlen;    /* Total length of this message */
-    uint16_t ver;       /* Protocol version, currently set to 1. */
-    uint16_t port;      /* Primary port number (TCP or TLS). */
-    uint16_t type;      /* Message type */
-    uint16_t count;     /* Only used for some kind of messages. */
-    uint64_t currentEpoch;  /* The epoch accordingly to the sending node. */
-    uint64_t configEpoch;   /* The config epoch if it's a master, or the last
-                               epoch advertised by its master if it is a
-                               slave. */
-    uint64_t offset;    /* Master replication offset if node is a master or
-                           processed replication offset if node is a slave. */
-    char sender[CLUSTER_NAMELEN]; /* Name of the sender node */
-    unsigned char myslots[CLUSTER_SLOTS/8];
-    char slaveof[CLUSTER_NAMELEN];
-    char myip[NET_IP_STR_LEN];    /* Sender IP, if not all zeroed. */
-    uint16_t extensions; /* Number of extensions sent along with this packet. */
-    char notused1[30];   /* 30 bytes reserved for future usage. */
-    uint16_t pport;      /* Secondary port number: if primary port is TCP port, this is 
-                            TLS port, and if primary port is TLS port, this is TCP port.*/
-    uint16_t cport;      /* Sender TCP cluster bus port */
-    uint16_t flags;      /* Sender node flags */
-    unsigned char state; /* Cluster state from the POV of the sender */
-    unsigned char mflags[3]; /* Message flags: CLUSTERMSG_FLAG[012]_... */
-    union clusterMsgData data;
-} clusterMsg;
-
-/* clusterMsg defines the gossip wire protocol exchanged among Redis cluster
- * members, which can be running different versions of redis-server bits,
- * especially during cluster rolling upgrades.
- *
- * Therefore, fields in this struct should remain at the same offset from
- * release to release. The static asserts below ensures that incompatible
- * changes in clusterMsg be caught at compile time.
- */
-
-static_assert(offsetof(clusterMsg, sig) == 0, "unexpected field offset");
-static_assert(offsetof(clusterMsg, totlen) == 4, "unexpected field offset");
-static_assert(offsetof(clusterMsg, ver) == 8, "unexpected field offset");
-static_assert(offsetof(clusterMsg, port) == 10, "unexpected field offset");
-static_assert(offsetof(clusterMsg, type) == 12, "unexpected field offset");
-static_assert(offsetof(clusterMsg, count) == 14, "unexpected field offset");
-static_assert(offsetof(clusterMsg, currentEpoch) == 16, "unexpected field offset");
-static_assert(offsetof(clusterMsg, configEpoch) == 24, "unexpected field offset");
-static_assert(offsetof(clusterMsg, offset) == 32, "unexpected field offset");
-static_assert(offsetof(clusterMsg, sender) == 40, "unexpected field offset");
-static_assert(offsetof(clusterMsg, myslots) == 80, "unexpected field offset");
-static_assert(offsetof(clusterMsg, slaveof) == 2128, "unexpected field offset");
-static_assert(offsetof(clusterMsg, myip) == 2168, "unexpected field offset");
-static_assert(offsetof(clusterMsg, extensions) == 2214, "unexpected field offset");
-static_assert(offsetof(clusterMsg, notused1) == 2216, "unexpected field offset");
-static_assert(offsetof(clusterMsg, pport) == 2246, "unexpected field offset");
-static_assert(offsetof(clusterMsg, cport) == 2248, "unexpected field offset");
-static_assert(offsetof(clusterMsg, flags) == 2250, "unexpected field offset");
-static_assert(offsetof(clusterMsg, state) == 2252, "unexpected field offset");
-static_assert(offsetof(clusterMsg, mflags) == 2253, "unexpected field offset");
-static_assert(offsetof(clusterMsg, data) == 2256, "unexpected field offset");
-
-#define CLUSTERMSG_MIN_LEN (sizeof(clusterMsg)-sizeof(union clusterMsgData))
-
-/* Message flags better specify the packet content or are used to
- * provide some information about the node state. */
-#define CLUSTERMSG_FLAG0_PAUSED (1<<0) /* Master paused for manual failover. */
-#define CLUSTERMSG_FLAG0_FORCEACK (1<<1) /* Give ACK to AUTH_REQUEST even if
-                                            master is up. */
-#define CLUSTERMSG_FLAG0_EXT_DATA (1<<2) /* Message contains extension data */
-
 /* ---------------------- API exported outside cluster.c -------------------- */
+/* functions requiring mechanism specific implementations */
 void clusterInit(void);
-void clusterInitListeners(void);
+void clusterInitLast(void);
 void clusterCron(void);
 void clusterBeforeSleep(void);
-clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *ask);
-int verifyClusterNodeId(const char *name, int length);
-clusterNode *clusterLookupNode(const char *name, int length);
-int clusterRedirectBlockedClientIfNeeded(client *c);
-void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_code);
-void migrateCloseTimedoutSockets(void);
 int verifyClusterConfigWithData(void);
-unsigned long getClusterConnectionsCount(void);
+
 int clusterSendModuleMessageToTarget(const char *target, uint64_t module_id, uint8_t type, const char *payload, uint32_t len);
-void clusterPropagatePublish(robj *channel, robj *message, int sharded);
-unsigned int keyHashSlot(char *key, int keylen);
-void slotToKeyAddEntry(dictEntry *entry, redisDb *db);
-void slotToKeyDelEntry(dictEntry *entry, redisDb *db);
-void slotToKeyReplaceEntry(dict *d, dictEntry *entry);
-void slotToKeyInit(redisDb *db);
-void slotToKeyFlush(redisDb *db);
-void slotToKeyDestroy(redisDb *db);
+
 void clusterUpdateMyselfFlags(void);
 void clusterUpdateMyselfIp(void);
-void slotToChannelAdd(sds channel);
-void slotToChannelDel(sds channel);
 void clusterUpdateMyselfHostname(void);
 void clusterUpdateMyselfAnnouncedPorts(void);
+void clusterUpdateMyselfHumanNodename(void);
+
+void clusterPropagatePublish(robj *channel, robj *message, int sharded);
+
+unsigned long getClusterConnectionsCount(void);
+int isClusterHealthy(void);
+
 sds clusterGenNodesDescription(client *c, int filter, int tls_primary);
 sds genClusterInfoString(void);
-void freeClusterLink(clusterLink *link);
-void clusterUpdateMyselfHumanNodename(void);
-int isValidAuxString(char *s, unsigned int length);
+/* handle implementation specific debug cluster commands. Return 1 if handled, 0 otherwise. */
+int handleDebugClusterCommand(client *c);
+const char **clusterDebugCommandExtendedHelp(void);
+/* handle implementation specific cluster commands. Return 1 if handled, 0 otherwise. */
+int clusterCommandSpecial(client *c);
+const char** clusterCommandExtendedHelp(void);
+
+int clusterAllowFailoverCmd(client *c);
+void clusterPromoteSelfToMaster(void);
+int clusterManualFailoverTimeLimit(void);
+
+void clusterCommandSlots(client * c);
+void clusterCommandMyId(client *c);
+void clusterCommandMyShardId(client *c);
+void clusterCommandShards(client *c);
+sds clusterGenNodeDescription(client *c, clusterNode *node, int tls_primary);
+
+int clusterNodeCoversSlot(clusterNode *n, int slot);
 int getNodeDefaultClientPort(clusterNode *n);
+int clusterNodeIsMyself(clusterNode *n);
+clusterNode *getMyClusterNode(void);
+char *getMyClusterId(void);
+int getClusterSize(void);
+int getMyShardSlotCount(void);
+int handleDebugClusterCommand(client *c);
+int clusterNodePending(clusterNode  *node);
+int clusterNodeIsMaster(clusterNode *n);
+char **getClusterNodesList(size_t *numnodes);
+int clusterNodeIsMaster(clusterNode *n);
+char *clusterNodeIp(clusterNode *node);
+int clusterNodeIsSlave(clusterNode *node);
+clusterNode *clusterNodeGetSlaveof(clusterNode *node);
+clusterNode *clusterNodeGetMaster(clusterNode *node);
+char *clusterNodeGetName(clusterNode *node);
+int clusterNodeTimedOut(clusterNode *node);
+int clusterNodeIsFailing(clusterNode *node);
+int clusterNodeIsNoFailover(clusterNode *node);
+char *clusterNodeGetShardId(clusterNode *node);
+int clusterNodeNumSlaves(clusterNode *node);
+clusterNode *clusterNodeGetSlave(clusterNode *node, int slave_idx);
+clusterNode *getMigratingSlotDest(int slot);
+clusterNode *getImportingSlotSource(int slot);
+clusterNode *getNodeBySlot(int slot);
+int clusterNodeClientPort(clusterNode *n, int use_tls);
+char *clusterNodeHostname(clusterNode *node);
+const char *clusterNodePreferredEndpoint(clusterNode *n);
+long long clusterNodeReplOffset(clusterNode *node);
+clusterNode *clusterLookupNode(const char *name, int length);
 
+/* functions with shared implementations */
+clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *ask);
+int clusterRedirectBlockedClientIfNeeded(client *c);
+void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_code);
+void migrateCloseTimedoutSockets(void);
+unsigned int keyHashSlot(char *key, int keylen);
+int patternHashSlot(char *pattern, int length);
+int isValidAuxString(char *s, unsigned int length);
+void migrateCommand(client *c);
+void clusterCommand(client *c);
+ConnectionType *connTypeOfCluster(void);
 #endif /* __CLUSTER_H */
diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c
new file mode 100644
index 00000000000..658b4f3b03b
--- /dev/null
+++ b/src/cluster_legacy.c
@@ -0,0 +1,6498 @@
+/*
+ * Copyright (c) 2009-Present, Redis Ltd.
+ * All rights reserved.
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
+ */
+
+/*
+ * cluster_legacy.c contains the implementation of the cluster API that is
+ * specific to the standard, Redis cluster-bus based clustering mechanism.
+ */
+
+#include "server.h"
+#include "cluster.h"
+#include "cluster_legacy.h"
+#include "endianconv.h"
+#include "connection.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <math.h>
+#include <sys/file.h>
+
+/* A global reference to myself is handy to make code more clear.
+ * Myself always points to server.cluster->myself, that is, the clusterNode
+ * that represents this node. */
+clusterNode *myself = NULL;
+
+clusterNode *createClusterNode(char *nodename, int flags);
+void clusterAddNode(clusterNode *node);
+void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask);
+void clusterReadHandler(connection *conn);
+void clusterSendPing(clusterLink *link, int type);
+void clusterSendFail(char *nodename);
+void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request);
+void clusterUpdateState(void);
+int clusterNodeCoversSlot(clusterNode *n, int slot);
+list *clusterGetNodesInMyShard(clusterNode *node);
+int clusterNodeAddSlave(clusterNode *master, clusterNode *slave);
+int clusterAddSlot(clusterNode *n, int slot);
+int clusterDelSlot(int slot);
+int clusterMoveNodeSlots(clusterNode *from_node, clusterNode *to_node);
+int clusterDelNodeSlots(clusterNode *node);
+int clusterNodeSetSlotBit(clusterNode *n, int slot);
+void clusterSetMaster(clusterNode *n);
+void clusterHandleSlaveFailover(void);
+void clusterHandleSlaveMigration(int max_slaves);
+int bitmapTestBit(unsigned char *bitmap, int pos);
+void bitmapSetBit(unsigned char *bitmap, int pos);
+void bitmapClearBit(unsigned char *bitmap, int pos);
+void clusterDoBeforeSleep(int flags);
+void clusterSendUpdate(clusterLink *link, clusterNode *node);
+void resetManualFailover(void);
+void clusterCloseAllSlots(void);
+void clusterSetNodeAsMaster(clusterNode *n);
+void clusterDelNode(clusterNode *delnode);
+sds representClusterNodeFlags(sds ci, uint16_t flags);
+sds representSlotInfo(sds ci, uint16_t *slot_info_pairs, int slot_info_pairs_count);
+void clusterFreeNodesSlotsInfo(clusterNode *n);
+uint64_t clusterGetMaxEpoch(void);
+int clusterBumpConfigEpochWithoutConsensus(void);
+void moduleCallClusterReceivers(const char *sender_id, uint64_t module_id, uint8_t type, const unsigned char *payload, uint32_t len);
+const char *clusterGetMessageTypeString(int type);
+void removeChannelsInSlot(unsigned int slot);
+unsigned int countKeysInSlot(unsigned int hashslot);
+unsigned int countChannelsInSlot(unsigned int hashslot);
+unsigned int delKeysInSlot(unsigned int hashslot);
+void clusterAddNodeToShard(const char *shard_id, clusterNode *node);
+list *clusterLookupNodeListByShardId(const char *shard_id);
+void clusterRemoveNodeFromShard(clusterNode *node);
+int auxShardIdSetter(clusterNode *n, void *value, int length);
+sds auxShardIdGetter(clusterNode *n, sds s);
+int auxShardIdPresent(clusterNode *n);
+int auxHumanNodenameSetter(clusterNode *n, void *value, int length);
+sds auxHumanNodenameGetter(clusterNode *n, sds s);
+int auxHumanNodenamePresent(clusterNode *n);
+int auxTcpPortSetter(clusterNode *n, void *value, int length);
+sds auxTcpPortGetter(clusterNode *n, sds s);
+int auxTcpPortPresent(clusterNode *n);
+int auxTlsPortSetter(clusterNode *n, void *value, int length);
+sds auxTlsPortGetter(clusterNode *n, sds s);
+int auxTlsPortPresent(clusterNode *n);
+static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen);
+void freeClusterLink(clusterLink *link);
+int verifyClusterNodeId(const char *name, int length);
+
+int getNodeDefaultClientPort(clusterNode *n) {
+    return server.tls_cluster ? n->tls_port : n->tcp_port;
+}
+
+static inline int getNodeDefaultReplicationPort(clusterNode *n) {
+    return server.tls_replication ? n->tls_port : n->tcp_port;
+}
+
+int clusterNodeClientPort(clusterNode *n, int use_tls) {
+    return use_tls ? n->tls_port : n->tcp_port;
+}
+
+static inline int defaultClientPort(void) {
+    return server.tls_cluster ? server.tls_port : server.port;
+}
+
+#define isSlotUnclaimed(slot) \
+    (server.cluster->slots[slot] == NULL || \
+        bitmapTestBit(server.cluster->owner_not_claiming_slot, slot))
+
+#define RCVBUF_INIT_LEN 1024
+#define RCVBUF_MAX_PREALLOC (1<<20) /* 1MB */
+
+/* Cluster nodes hash table, mapping nodes addresses 1.2.3.4:6379 to
+ * clusterNode structures. */
+dictType clusterNodesDictType = {
+        dictSdsHash,                /* hash function */
+        NULL,                       /* key dup */
+        NULL,                       /* val dup */
+        dictSdsKeyCompare,          /* key compare */
+        dictSdsDestructor,          /* key destructor */
+        NULL,                       /* val destructor */
+        NULL                        /* allow to expand */
+};
+
+/* Cluster re-addition blacklist. This maps node IDs to the time
+ * we can re-add this node. The goal is to avoid reading a removed
+ * node for some time. */
+dictType clusterNodesBlackListDictType = {
+        dictSdsCaseHash,            /* hash function */
+        NULL,                       /* key dup */
+        NULL,                       /* val dup */
+        dictSdsKeyCaseCompare,      /* key compare */
+        dictSdsDestructor,          /* key destructor */
+        NULL,                       /* val destructor */
+        NULL                        /* allow to expand */
+};
+
+/* Cluster shards hash table, mapping shard id to list of nodes */
+dictType clusterSdsToListType = {
+        dictSdsHash,                /* hash function */
+        NULL,                       /* key dup */
+        NULL,                       /* val dup */
+        dictSdsKeyCompare,          /* key compare */
+        dictSdsDestructor,          /* key destructor */
+        dictListDestructor,         /* val destructor */
+        NULL                        /* allow to expand */
+};
+
+/* Aux fields are introduced in Redis 7.2 to support the persistence
+ * of various important node properties, such as shard id, in nodes.conf.
+ * Aux fields take an explicit format of name=value pairs and have no
+ * intrinsic order among them. Aux fields are always grouped together
+ * at the end of the second column of each row after the node's IP
+ * address/port/cluster_port and the optional hostname. Aux fields
+ * are separated by ','. */
+
+/* Aux field setter function prototype
+ * return C_OK when the update is successful; C_ERR otherwise */
+typedef int (aux_value_setter) (clusterNode* n, void *value, int length);
+/* Aux field getter function prototype
+ * return an sds that is a concatenation of the input sds string and
+ * the aux value */
+typedef sds (aux_value_getter) (clusterNode* n, sds s);
+
+typedef int (aux_value_present) (clusterNode* n);
+
+typedef struct {
+    char *field;
+    aux_value_setter *setter;
+    aux_value_getter *getter;
+    aux_value_present *isPresent;
+} auxFieldHandler;
+
+/* Assign index to each aux field */
+typedef enum {
+    af_shard_id,
+    af_human_nodename,
+    af_tcp_port,
+    af_tls_port,
+    af_count,
+} auxFieldIndex;
+
+/* Note that
+ * 1. the order of the elements below must match that of their
+ *    indices as defined in auxFieldIndex
+ * 2. aux name can contain characters that pass the isValidAuxChar check only */
+auxFieldHandler auxFieldHandlers[] = {
+    {"shard-id", auxShardIdSetter, auxShardIdGetter, auxShardIdPresent},
+    {"nodename", auxHumanNodenameSetter, auxHumanNodenameGetter, auxHumanNodenamePresent},
+    {"tcp-port", auxTcpPortSetter, auxTcpPortGetter, auxTcpPortPresent},
+    {"tls-port", auxTlsPortSetter, auxTlsPortGetter, auxTlsPortPresent},
+};
+
+int auxShardIdSetter(clusterNode *n, void *value, int length) {
+    if (verifyClusterNodeId(value, length) == C_ERR) {
+        return C_ERR;
+    }
+    memcpy(n->shard_id, value, CLUSTER_NAMELEN);
+    /* if n already has replicas, make sure they all agree
+     * on the shard id */
+    for (int i = 0; i < n->numslaves; i++) {
+        if (memcmp(n->slaves[i]->shard_id, n->shard_id, CLUSTER_NAMELEN) != 0) {
+            return C_ERR;
+        }
+    }
+    clusterAddNodeToShard(value, n);
+    return C_OK;
+}
+
+sds auxShardIdGetter(clusterNode *n, sds s) {
+    return sdscatprintf(s, "%.40s", n->shard_id);
+}
+
+int auxShardIdPresent(clusterNode *n) {
+    return strlen(n->shard_id);
+}
+
+int auxHumanNodenameSetter(clusterNode *n, void *value, int length) {
+    if (n && !strncmp(value, n->human_nodename, length)) {
+        return C_OK;
+    } else if (!n && (length == 0)) {
+        return C_OK;
+    }
+    if (n) {
+        n->human_nodename = sdscpylen(n->human_nodename, value, length);
+    } else if (sdslen(n->human_nodename) != 0) {
+        sdsclear(n->human_nodename);
+    } else {
+        return C_ERR;
+    }
+    return C_OK;
+}
+
+sds auxHumanNodenameGetter(clusterNode *n, sds s) {
+    return sdscatprintf(s, "%s", n->human_nodename);
+}
+
+int auxHumanNodenamePresent(clusterNode *n) {
+    return sdslen(n->human_nodename);
+}
+
+int auxTcpPortSetter(clusterNode *n, void *value, int length) {
+    if (length > 5 || length < 1) {
+        return C_ERR;
+    }
+    char buf[length + 1];
+    memcpy(buf, (char*)value, length);
+    buf[length] = '\0';
+    n->tcp_port = atoi(buf);
+    return (n->tcp_port < 0 || n->tcp_port >= 65536) ? C_ERR : C_OK;
+}
+
+sds auxTcpPortGetter(clusterNode *n, sds s) {
+    return sdscatprintf(s, "%d", n->tcp_port);
+}
+
+int auxTcpPortPresent(clusterNode *n) {
+    return n->tcp_port >= 0 && n->tcp_port < 65536;
+}
+
+int auxTlsPortSetter(clusterNode *n, void *value, int length) {
+    if (length > 5 || length < 1) {
+        return C_ERR;
+    }
+    char buf[length + 1];
+    memcpy(buf, (char*)value, length);
+    buf[length] = '\0';
+    n->tls_port = atoi(buf);
+    return (n->tls_port < 0 || n->tls_port >= 65536) ? C_ERR : C_OK;
+}
+
+sds auxTlsPortGetter(clusterNode *n, sds s) {
+    return sdscatprintf(s, "%d", n->tls_port);
+}
+
+int auxTlsPortPresent(clusterNode *n) {
+    return n->tls_port >= 0 && n->tls_port < 65536;
+}
+
+/* clusterLink send queue blocks */
+typedef struct {
+    size_t totlen; /* Total length of this block including the message */
+    int refcount;  /* Number of cluster link send msg queues containing the message */
+    clusterMsg msg;
+} clusterMsgSendBlock;
+
+/* -----------------------------------------------------------------------------
+ * Initialization
+ * -------------------------------------------------------------------------- */
+
+/* Load the cluster config from 'filename'.
+ *
+ * If the file does not exist or is zero-length (this may happen because
+ * when we lock the nodes.conf file, we create a zero-length one for the
+ * sake of locking if it does not already exist), C_ERR is returned.
+ * If the configuration was loaded from the file, C_OK is returned. */
+int clusterLoadConfig(char *filename) {
+    FILE *fp = fopen(filename,"r");
+    struct stat sb;
+    char *line;
+    int maxline, j;
+
+    if (fp == NULL) {
+        if (errno == ENOENT) {
+            return C_ERR;
+        } else {
+            serverLog(LL_WARNING,
+                "Loading the cluster node config from %s: %s",
+                filename, strerror(errno));
+            exit(1);
+        }
+    }
+
+    if (redis_fstat(fileno(fp),&sb) == -1) {
+        serverLog(LL_WARNING,
+            "Unable to obtain the cluster node config file stat %s: %s",
+            filename, strerror(errno));
+        exit(1);
+    }
+    /* Check if the file is zero-length: if so return C_ERR to signal
+     * we have to write the config. */
+    if (sb.st_size == 0) {
+        fclose(fp);
+        return C_ERR;
+    }
+
+    /* Parse the file. Note that single lines of the cluster config file can
+     * be really long as they include all the hash slots of the node.
+     * This means in the worst possible case, half of the Redis slots will be
+     * present in a single line, possibly in importing or migrating state, so
+     * together with the node ID of the sender/receiver.
+     *
+     * To simplify we allocate 1024+CLUSTER_SLOTS*128 bytes per line. */
+    maxline = 1024+CLUSTER_SLOTS*128;
+    line = zmalloc(maxline);
+    while(fgets(line,maxline,fp) != NULL) {
+        int argc, aux_argc;
+        sds *argv, *aux_argv;
+        clusterNode *n, *master;
+        char *p, *s;
+
+        /* Skip blank lines, they can be created either by users manually
+         * editing nodes.conf or by the config writing process if stopped
+         * before the truncate() call. */
+        if (line[0] == '\n' || line[0] == '\0') continue;
+
+        /* Split the line into arguments for processing. */
+        argv = sdssplitargs(line,&argc);
+        if (argv == NULL) goto fmterr;
+
+        /* Handle the special "vars" line. Don't pretend it is the last
+         * line even if it actually is when generated by Redis. */
+        if (strcasecmp(argv[0],"vars") == 0) {
+            if (!(argc % 2)) goto fmterr;
+            for (j = 1; j < argc; j += 2) {
+                if (strcasecmp(argv[j],"currentEpoch") == 0) {
+                    server.cluster->currentEpoch =
+                            strtoull(argv[j+1],NULL,10);
+                } else if (strcasecmp(argv[j],"lastVoteEpoch") == 0) {
+                    server.cluster->lastVoteEpoch =
+                            strtoull(argv[j+1],NULL,10);
+                } else {
+                    serverLog(LL_NOTICE,
+                        "Skipping unknown cluster config variable '%s'",
+                        argv[j]);
+                }
+            }
+            sdsfreesplitres(argv,argc);
+            continue;
+        }
+
+        /* Regular config lines have at least eight fields */
+        if (argc < 8) {
+            sdsfreesplitres(argv,argc);
+            goto fmterr;
+        }
+
+        /* Create this node if it does not exist */
+        if (verifyClusterNodeId(argv[0], sdslen(argv[0])) == C_ERR) {
+            sdsfreesplitres(argv, argc);
+            goto fmterr;
+        }
+        n = clusterLookupNode(argv[0], sdslen(argv[0]));
+        if (!n) {
+            n = createClusterNode(argv[0],0);
+            clusterAddNode(n);
+        }
+        /* Format for the node address and auxiliary argument information:
+         * ip:port[@cport][,hostname][,aux=val]*] */
+
+        aux_argv = sdssplitlen(argv[1], sdslen(argv[1]), ",", 1, &aux_argc);
+        if (aux_argv == NULL) {
+            sdsfreesplitres(argv,argc);
+            goto fmterr;
+        }
+
+        /* Hostname is an optional argument that defines the endpoint
+         * that can be reported to clients instead of IP. */
+        if (aux_argc > 1 && sdslen(aux_argv[1]) > 0) {
+            n->hostname = sdscpy(n->hostname, aux_argv[1]);
+        } else if (sdslen(n->hostname) != 0) {
+            sdsclear(n->hostname);
+        }
+
+        /* All fields after hostname are auxiliary and they take on
+         * the format of "aux=val" where both aux and val can contain
+         * characters that pass the isValidAuxChar check only. The order
+         * of the aux fields is insignificant. */
+        int aux_tcp_port = 0;
+        int aux_tls_port = 0;
+        for (int i = 2; i < aux_argc; i++) {
+            int field_argc;
+            sds *field_argv;
+            field_argv = sdssplitlen(aux_argv[i], sdslen(aux_argv[i]), "=", 1, &field_argc);
+            if (field_argv == NULL || field_argc != 2) {
+                /* Invalid aux field format */
+                if (field_argv != NULL) sdsfreesplitres(field_argv, field_argc);
+                sdsfreesplitres(aux_argv, aux_argc);
+                sdsfreesplitres(argv,argc);
+                goto fmterr;
+            }
+
+            /* Validate that both aux and value contain valid characters only */
+            for (unsigned j = 0; j < 2; j++) {
+                if (!isValidAuxString(field_argv[j],sdslen(field_argv[j]))){
+                    /* Invalid aux field format */
+                    sdsfreesplitres(field_argv, field_argc);
+                    sdsfreesplitres(aux_argv, aux_argc);
+                    sdsfreesplitres(argv,argc);
+                    goto fmterr;
+                }
+            }
+
+            /* Note that we don't expect lots of aux fields in the foreseeable
+             * future so a linear search is completely fine. */
+            int field_found = 0;
+            for (unsigned j = 0; j < numElements(auxFieldHandlers); j++) {
+                if (sdslen(field_argv[0]) != strlen(auxFieldHandlers[j].field) ||
+                    memcmp(field_argv[0], auxFieldHandlers[j].field, sdslen(field_argv[0])) != 0) {
+                    continue;
+                }
+                field_found = 1;
+                aux_tcp_port |= j == af_tcp_port;
+                aux_tls_port |= j == af_tls_port;
+                if (auxFieldHandlers[j].setter(n, field_argv[1], sdslen(field_argv[1])) != C_OK) {
+                    /* Invalid aux field format */
+                    sdsfreesplitres(field_argv, field_argc);
+                    sdsfreesplitres(aux_argv, aux_argc);
+                    sdsfreesplitres(argv,argc);
+                    goto fmterr;
+                }
+            }
+
+            if (field_found == 0) {
+                /* Invalid aux field format */
+                sdsfreesplitres(field_argv, field_argc);
+                sdsfreesplitres(aux_argv, aux_argc);
+                sdsfreesplitres(argv,argc);
+                goto fmterr;
+            }
+
+            sdsfreesplitres(field_argv, field_argc);
+        }
+        /* Address and port */
+        if ((p = strrchr(aux_argv[0],':')) == NULL) {
+            sdsfreesplitres(aux_argv, aux_argc);
+            sdsfreesplitres(argv,argc);
+            goto fmterr;
+        }
+        *p = '\0';
+        memcpy(n->ip,aux_argv[0],strlen(aux_argv[0])+1);
+        char *port = p+1;
+        char *busp = strchr(port,'@');
+        if (busp) {
+            *busp = '\0';
+            busp++;
+        }
+        /* If neither TCP or TLS port is found in aux field, it is considered
+         * an old version of nodes.conf file.*/
+        if (!aux_tcp_port && !aux_tls_port) {
+            if (server.tls_cluster) {
+                n->tls_port = atoi(port);
+            } else {
+                n->tcp_port = atoi(port);
+            }
+        } else if (!aux_tcp_port) {
+            n->tcp_port = atoi(port);
+        } else if (!aux_tls_port) {
+            n->tls_port = atoi(port);
+        }
+        /* In older versions of nodes.conf the "@busport" part is missing.
+         * In this case we set it to the default offset of 10000 from the
+         * base port. */
+        n->cport = busp ? atoi(busp) : (getNodeDefaultClientPort(n) + CLUSTER_PORT_INCR);
+
+        /* The plaintext port for client in a TLS cluster (n->pport) is not
+         * stored in nodes.conf. It is received later over the bus protocol. */
+
+        sdsfreesplitres(aux_argv, aux_argc);
+
+        /* Parse flags */
+        p = s = argv[2];
+        while(p) {
+            p = strchr(s,',');
+            if (p) *p = '\0';
+            if (!strcasecmp(s,"myself")) {
+                serverAssert(server.cluster->myself == NULL);
+                myself = server.cluster->myself = n;
+                n->flags |= CLUSTER_NODE_MYSELF;
+            } else if (!strcasecmp(s,"master")) {
+                n->flags |= CLUSTER_NODE_MASTER;
+            } else if (!strcasecmp(s,"slave")) {
+                n->flags |= CLUSTER_NODE_SLAVE;
+            } else if (!strcasecmp(s,"fail?")) {
+                n->flags |= CLUSTER_NODE_PFAIL;
+            } else if (!strcasecmp(s,"fail")) {
+                n->flags |= CLUSTER_NODE_FAIL;
+                n->fail_time = mstime();
+            } else if (!strcasecmp(s,"handshake")) {
+                n->flags |= CLUSTER_NODE_HANDSHAKE;
+            } else if (!strcasecmp(s,"noaddr")) {
+                n->flags |= CLUSTER_NODE_NOADDR;
+            } else if (!strcasecmp(s,"nofailover")) {
+                n->flags |= CLUSTER_NODE_NOFAILOVER;
+            } else if (!strcasecmp(s,"noflags")) {
+                /* nothing to do */
+            } else {
+                serverPanic("Unknown flag in redis cluster config file");
+            }
+            if (p) s = p+1;
+        }
+
+        /* Get master if any. Set the master and populate master's
+         * slave list. */
+        if (argv[3][0] != '-') {
+            if (verifyClusterNodeId(argv[3], sdslen(argv[3])) == C_ERR) {
+                sdsfreesplitres(argv, argc);
+                goto fmterr;
+            }
+            master = clusterLookupNode(argv[3], sdslen(argv[3]));
+            if (!master) {
+                master = createClusterNode(argv[3],0);
+                clusterAddNode(master);
+            }
+            /* shard_id can be absent if we are loading a nodes.conf generated
+             * by an older version of Redis; we should follow the primary's
+             * shard_id in this case */
+            if (auxFieldHandlers[af_shard_id].isPresent(n) == 0) {
+                memcpy(n->shard_id, master->shard_id, CLUSTER_NAMELEN);
+                clusterAddNodeToShard(master->shard_id, n);
+            } else if (clusterGetNodesInMyShard(master) != NULL &&
+                       memcmp(master->shard_id, n->shard_id, CLUSTER_NAMELEN) != 0)
+            {
+                /* If the primary has been added to a shard, make sure this
+                 * node has the same persisted shard id as the primary. */
+                goto fmterr;
+            }
+            n->slaveof = master;
+            clusterNodeAddSlave(master,n);
+        } else if (auxFieldHandlers[af_shard_id].isPresent(n) == 0) {
+            /* n is a primary but it does not have a persisted shard_id.
+             * This happens if we are loading a nodes.conf generated by
+             * an older version of Redis. We should manually update the
+             * shard membership in this case */
+            clusterAddNodeToShard(n->shard_id, n);
+        }
+
+        /* Set ping sent / pong received timestamps */
+        if (atoi(argv[4])) n->ping_sent = mstime();
+        if (atoi(argv[5])) n->pong_received = mstime();
+
+        /* Set configEpoch for this node.
+         * If the node is a replica, set its config epoch to 0.
+         * If it's a primary, load the config epoch from the configuration file. */
+        n->configEpoch = (nodeIsSlave(n) && n->slaveof) ? 0 : strtoull(argv[6],NULL,10);
+
+        /* Populate hash slots served by this instance. */
+        for (j = 8; j < argc; j++) {
+            int start, stop;
+
+            if (argv[j][0] == '[') {
+                /* Here we handle migrating / importing slots */
+                int slot;
+                char direction;
+                clusterNode *cn;
+
+                p = strchr(argv[j],'-');
+                serverAssert(p != NULL);
+                *p = '\0';
+                direction = p[1]; /* Either '>' or '<' */
+                slot = atoi(argv[j]+1);
+                if (slot < 0 || slot >= CLUSTER_SLOTS) {
+                    sdsfreesplitres(argv,argc);
+                    goto fmterr;
+                }
+                p += 3;
+
+                char *pr = strchr(p, ']');
+                size_t node_len = pr - p;
+                if (pr == NULL || verifyClusterNodeId(p, node_len) == C_ERR) {
+                    sdsfreesplitres(argv, argc);
+                    goto fmterr;
+                }
+                cn = clusterLookupNode(p, CLUSTER_NAMELEN);
+                if (!cn) {
+                    cn = createClusterNode(p,0);
+                    clusterAddNode(cn);
+                }
+                if (direction == '>') {
+                    server.cluster->migrating_slots_to[slot] = cn;
+                } else {
+                    server.cluster->importing_slots_from[slot] = cn;
+                }
+                continue;
+            } else if ((p = strchr(argv[j],'-')) != NULL) {
+                *p = '\0';
+                start = atoi(argv[j]);
+                stop = atoi(p+1);
+            } else {
+                start = stop = atoi(argv[j]);
+            }
+            if (start < 0 || start >= CLUSTER_SLOTS ||
+                stop < 0 || stop >= CLUSTER_SLOTS)
+            {
+                sdsfreesplitres(argv,argc);
+                goto fmterr;
+            }
+            while(start <= stop) clusterAddSlot(n, start++);
+        }
+
+        sdsfreesplitres(argv,argc);
+    }
+    /* Config sanity check */
+    if (server.cluster->myself == NULL) goto fmterr;
+
+    zfree(line);
+    fclose(fp);
+
+    serverLog(LL_NOTICE,"Node configuration loaded, I'm %.40s", myself->name);
+
+    /* Something that should never happen: currentEpoch smaller than
+     * the max epoch found in the nodes configuration. However we handle this
+     * as some form of protection against manual editing of critical files. */
+    if (clusterGetMaxEpoch() > server.cluster->currentEpoch) {
+        server.cluster->currentEpoch = clusterGetMaxEpoch();
+    }
+    return C_OK;
+
+fmterr:
+    serverLog(LL_WARNING,
+        "Unrecoverable error: corrupted cluster config file \"%s\".", line);
+    zfree(line);
+    if (fp) fclose(fp);
+    exit(1);
+}
+
+/* Cluster node configuration is exactly the same as CLUSTER NODES output.
+ *
+ * This function writes the node config and returns 0, on error -1
+ * is returned.
+ *
+ * Note: we need to write the file in an atomic way from the point of view
+ * of the POSIX filesystem semantics, so that if the server is stopped
+ * or crashes during the write, we'll end with either the old file or the
+ * new one. Since we have the full payload to write available we can use
+ * a single write to write the whole file. If the pre-existing file was
+ * bigger we pad our payload with newlines that are anyway ignored and truncate
+ * the file afterward. */
+int clusterSaveConfig(int do_fsync) {
+    sds ci,tmpfilename;
+    size_t content_size,offset = 0;
+    ssize_t written_bytes;
+    int fd = -1;
+    int retval = C_ERR;
+
+    server.cluster->todo_before_sleep &= ~CLUSTER_TODO_SAVE_CONFIG;
+
+    /* Get the nodes description and concatenate our "vars" directive to
+     * save currentEpoch and lastVoteEpoch. */
+    ci = clusterGenNodesDescription(NULL, CLUSTER_NODE_HANDSHAKE, 0);
+    ci = sdscatprintf(ci,"vars currentEpoch %llu lastVoteEpoch %llu\n",
+        (unsigned long long) server.cluster->currentEpoch,
+        (unsigned long long) server.cluster->lastVoteEpoch);
+    content_size = sdslen(ci);
+
+    /* Create a temp file with the new content. */
+    tmpfilename = sdscatfmt(sdsempty(),"%s.tmp-%i-%I",
+        server.cluster_configfile,(int) getpid(),mstime());
+    if ((fd = open(tmpfilename,O_WRONLY|O_CREAT,0644)) == -1) {
+        serverLog(LL_WARNING,"Could not open temp cluster config file: %s",strerror(errno));
+        goto cleanup;
+    }
+
+    while (offset < content_size) {
+        written_bytes = write(fd,ci + offset,content_size - offset);
+        if (written_bytes <= 0) {
+            if (errno == EINTR) continue;
+            serverLog(LL_WARNING,"Failed after writing (%zd) bytes to tmp cluster config file: %s",
+                offset,strerror(errno));
+            goto cleanup;
+        }
+        offset += written_bytes;
+    }
+
+    if (do_fsync) {
+        server.cluster->todo_before_sleep &= ~CLUSTER_TODO_FSYNC_CONFIG;
+        if (redis_fsync(fd) == -1) {
+            serverLog(LL_WARNING,"Could not sync tmp cluster config file: %s",strerror(errno));
+            goto cleanup;
+        }
+    }
+
+    if (rename(tmpfilename, server.cluster_configfile) == -1) {
+        serverLog(LL_WARNING,"Could not rename tmp cluster config file: %s",strerror(errno));
+        goto cleanup;
+    }
+
+    if (do_fsync) {
+        if (fsyncFileDir(server.cluster_configfile) == -1) {
+            serverLog(LL_WARNING,"Could not sync cluster config file dir: %s",strerror(errno));
+            goto cleanup;
+        }
+    }
+    retval = C_OK; /* If we reached this point, everything is fine. */
+
+cleanup:
+    if (fd != -1) close(fd);
+    if (retval) unlink(tmpfilename);
+    sdsfree(tmpfilename);
+    sdsfree(ci);
+    return retval;
+}
+
+void clusterSaveConfigOrDie(int do_fsync) {
+    if (clusterSaveConfig(do_fsync) == -1) {
+        serverLog(LL_WARNING,"Fatal: can't update cluster config file.");
+        exit(1);
+    }
+}
+
+/* Lock the cluster config using flock(), and retain the file descriptor used to
+ * acquire the lock so that the file will be locked as long as the process is up.
+ *
+ * This works because we always update nodes.conf with a new version
+ * in-place, reopening the file, and writing to it in place (later adjusting
+ * the length with ftruncate()).
+ *
+ * On success C_OK is returned, otherwise an error is logged and
+ * the function returns C_ERR to signal a lock was not acquired. */
+int clusterLockConfig(char *filename) {
+/* flock() does not exist on Solaris
+ * and a fcntl-based solution won't help, as we constantly re-open that file,
+ * which will release _all_ locks anyway
+ */
+#if !defined(__sun)
+    /* To lock it, we need to open the file in a way it is created if
+     * it does not exist, otherwise there is a race condition with other
+     * processes. */
+    int fd = open(filename,O_WRONLY|O_CREAT|O_CLOEXEC,0644);
+    if (fd == -1) {
+        serverLog(LL_WARNING,
+            "Can't open %s in order to acquire a lock: %s",
+            filename, strerror(errno));
+        return C_ERR;
+    }
+
+    if (flock(fd,LOCK_EX|LOCK_NB) == -1) {
+        if (errno == EWOULDBLOCK) {
+            serverLog(LL_WARNING,
+                 "Sorry, the cluster configuration file %s is already used "
+                 "by a different Redis Cluster node. Please make sure that "
+                 "different nodes use different cluster configuration "
+                 "files.", filename);
+        } else {
+            serverLog(LL_WARNING,
+                "Impossible to lock %s: %s", filename, strerror(errno));
+        }
+        close(fd);
+        return C_ERR;
+    }
+    /* Lock acquired: leak the 'fd' by not closing it until shutdown time, so that
+     * we'll retain the lock to the file as long as the process exists.
+     *
+     * After fork, the child process will get the fd opened by the parent process,
+     * we need save `fd` to `cluster_config_file_lock_fd`, so that in redisFork(),
+     * it will be closed in the child process.
+     * If it is not closed, when the main process is killed -9, but the child process
+     * (redis-aof-rewrite) is still alive, the fd(lock) will still be held by the
+     * child process, and the main process will fail to get lock, means fail to start. */
+    server.cluster_config_file_lock_fd = fd;
+#else
+    UNUSED(filename);
+#endif /* __sun */
+
+    return C_OK;
+}
+
+/* Derives our ports to be announced in the cluster bus. */
+void deriveAnnouncedPorts(int *announced_tcp_port, int *announced_tls_port,
+                          int *announced_cport) {
+    /* Config overriding announced ports. */
+    *announced_tcp_port = server.cluster_announce_port ? 
+                          server.cluster_announce_port : server.port;
+    *announced_tls_port = server.cluster_announce_tls_port ? 
+                          server.cluster_announce_tls_port : server.tls_port;
+    /* Derive cluster bus port. */
+    if (server.cluster_announce_bus_port) {
+        *announced_cport = server.cluster_announce_bus_port;
+    } else if (server.cluster_port) {
+        *announced_cport = server.cluster_port;
+    } else {
+        *announced_cport = defaultClientPort() + CLUSTER_PORT_INCR;
+    }
+}
+
+/* Some flags (currently just the NOFAILOVER flag) may need to be updated
+ * in the "myself" node based on the current configuration of the node,
+ * that may change at runtime via CONFIG SET. This function changes the
+ * set of flags in myself->flags accordingly. */
+void clusterUpdateMyselfFlags(void) {
+    if (!myself) return;
+    int oldflags = myself->flags;
+    int nofailover = server.cluster_slave_no_failover ?
+                     CLUSTER_NODE_NOFAILOVER : 0;
+    myself->flags &= ~CLUSTER_NODE_NOFAILOVER;
+    myself->flags |= nofailover;
+    if (myself->flags != oldflags) {
+        clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+                             CLUSTER_TODO_UPDATE_STATE);
+    }
+}
+
+
+/* We want to take myself->port/cport/pport in sync with the
+* cluster-announce-port/cluster-announce-bus-port/cluster-announce-tls-port option.
+* The option can be set at runtime via CONFIG SET. */
+void clusterUpdateMyselfAnnouncedPorts(void) {
+    if (!myself) return;
+    deriveAnnouncedPorts(&myself->tcp_port,&myself->tls_port,&myself->cport);
+}
+
+/* We want to take myself->ip in sync with the cluster-announce-ip option.
+* The option can be set at runtime via CONFIG SET. */
+void clusterUpdateMyselfIp(void) {
+    if (!myself) return;
+    static char *prev_ip = NULL;
+    char *curr_ip = server.cluster_announce_ip;
+    int changed = 0;
+
+    if (prev_ip == NULL && curr_ip != NULL) changed = 1;
+    else if (prev_ip != NULL && curr_ip == NULL) changed = 1;
+    else if (prev_ip && curr_ip && strcmp(prev_ip,curr_ip)) changed = 1;
+
+    if (changed) {
+        if (prev_ip) zfree(prev_ip);
+        prev_ip = curr_ip;
+
+        if (curr_ip) {
+            /* We always take a copy of the previous IP address, by
+            * duplicating the string. This way later we can check if
+            * the address really changed. */
+            prev_ip = zstrdup(prev_ip);
+            redis_strlcpy(myself->ip,server.cluster_announce_ip,NET_IP_STR_LEN);
+        } else {
+            myself->ip[0] = '\0'; /* Force autodetection. */
+        }
+    }
+}
+
+/* Update the hostname for the specified node with the provided C string. */
+static void updateAnnouncedHostname(clusterNode *node, char *new) {
+    /* Previous and new hostname are the same, no need to update. */
+    if (new && !strcmp(new, node->hostname)) {
+        return;
+    } else if (!new && (sdslen(node->hostname) == 0)) {
+        return;
+    }
+
+    if (new) {
+        node->hostname = sdscpy(node->hostname, new);
+    } else if (sdslen(node->hostname) != 0) {
+        sdsclear(node->hostname);
+    }
+    clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
+}
+
+static void updateAnnouncedHumanNodename(clusterNode *node, char *new) {
+    if (new && !strcmp(new, node->human_nodename)) {
+        return;
+    } else if (!new && (sdslen(node->human_nodename) == 0)) {
+        return;
+    }
+    
+    if (new) {
+        node->human_nodename = sdscpy(node->human_nodename, new);
+    } else if (sdslen(node->human_nodename) != 0) {
+        sdsclear(node->human_nodename);
+    }
+    clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
+}
+
+
+static void updateShardId(clusterNode *node, const char *shard_id) {
+    if (shard_id && memcmp(node->shard_id, shard_id, CLUSTER_NAMELEN) != 0) {
+        clusterRemoveNodeFromShard(node);
+        memcpy(node->shard_id, shard_id, CLUSTER_NAMELEN);
+        clusterAddNodeToShard(shard_id, node);
+        clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
+    }
+    if (shard_id && myself != node && myself->slaveof == node) {
+        if (memcmp(myself->shard_id, shard_id, CLUSTER_NAMELEN) != 0) {
+            /* shard-id can diverge right after a rolling upgrade
+             * from pre-7.2 releases */
+            clusterRemoveNodeFromShard(myself);
+            memcpy(myself->shard_id, shard_id, CLUSTER_NAMELEN);
+            clusterAddNodeToShard(shard_id, myself);
+            clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|CLUSTER_TODO_FSYNC_CONFIG);
+        }
+    }
+}
+
+/* Update my hostname based on server configuration values */
+void clusterUpdateMyselfHostname(void) {
+    if (!myself) return;
+    updateAnnouncedHostname(myself, server.cluster_announce_hostname);
+}
+
+void clusterUpdateMyselfHumanNodename(void) {
+    if (!myself) return;
+    updateAnnouncedHumanNodename(myself, server.cluster_announce_human_nodename);
+}
+
+void clusterInit(void) {
+    int saveconf = 0;
+
+    server.cluster = zmalloc(sizeof(struct clusterState));
+    server.cluster->myself = NULL;
+    server.cluster->currentEpoch = 0;
+    server.cluster->state = CLUSTER_FAIL;
+    server.cluster->size = 0;
+    server.cluster->todo_before_sleep = 0;
+    server.cluster->nodes = dictCreate(&clusterNodesDictType);
+    server.cluster->shards = dictCreate(&clusterSdsToListType);
+    server.cluster->nodes_black_list =
+        dictCreate(&clusterNodesBlackListDictType);
+    server.cluster->failover_auth_time = 0;
+    server.cluster->failover_auth_count = 0;
+    server.cluster->failover_auth_rank = 0;
+    server.cluster->failover_auth_epoch = 0;
+    server.cluster->cant_failover_reason = CLUSTER_CANT_FAILOVER_NONE;
+    server.cluster->lastVoteEpoch = 0;
+
+    /* Initialize stats */
+    for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) {
+        server.cluster->stats_bus_messages_sent[i] = 0;
+        server.cluster->stats_bus_messages_received[i] = 0;
+    }
+    server.cluster->stats_pfail_nodes = 0;
+    server.cluster->stat_cluster_links_buffer_limit_exceeded = 0;
+
+    memset(server.cluster->slots,0, sizeof(server.cluster->slots));
+    clusterCloseAllSlots();
+
+    memset(server.cluster->owner_not_claiming_slot, 0, sizeof(server.cluster->owner_not_claiming_slot));
+
+    /* Lock the cluster config file to make sure every node uses
+     * its own nodes.conf. */
+    server.cluster_config_file_lock_fd = -1;
+    if (clusterLockConfig(server.cluster_configfile) == C_ERR)
+        exit(1);
+
+    /* Load or create a new nodes configuration. */
+    if (clusterLoadConfig(server.cluster_configfile) == C_ERR) {
+        /* No configuration found. We will just use the random name provided
+         * by the createClusterNode() function. */
+        myself = server.cluster->myself =
+            createClusterNode(NULL,CLUSTER_NODE_MYSELF|CLUSTER_NODE_MASTER);
+        serverLog(LL_NOTICE,"No cluster configuration found, I'm %.40s",
+            myself->name);
+        clusterAddNode(myself);
+        clusterAddNodeToShard(myself->shard_id, myself);
+        saveconf = 1;
+    }
+    if (saveconf) clusterSaveConfigOrDie(1);
+
+    /* Port sanity check II
+     * The other handshake port check is triggered too late to stop
+     * us from trying to use a too-high cluster port number. */
+    int port = defaultClientPort();
+    if (!server.cluster_port && port > (65535-CLUSTER_PORT_INCR)) {
+        serverLog(LL_WARNING, "Redis port number too high. "
+                   "Cluster communication port is 10,000 port "
+                   "numbers higher than your Redis port. "
+                   "Your Redis port number must be 55535 or less.");
+        exit(1);
+    }
+    if (!server.bindaddr_count) {
+        serverLog(LL_WARNING, "No bind address is configured, but it is required for the Cluster bus.");
+        exit(1);
+    }
+
+    /* Set myself->port/cport/pport to my listening ports, we'll just need to
+     * discover the IP address via MEET messages. */
+    deriveAnnouncedPorts(&myself->tcp_port, &myself->tls_port, &myself->cport);
+
+    server.cluster->mf_end = 0;
+    server.cluster->mf_slave = NULL;
+    resetManualFailover();
+    clusterUpdateMyselfFlags();
+    clusterUpdateMyselfIp();
+    clusterUpdateMyselfHostname();
+    clusterUpdateMyselfHumanNodename();
+}
+
+void clusterInitLast(void) {
+    if (connectionIndexByType(connTypeOfCluster()->get_type(NULL)) < 0) {
+        serverLog(LL_WARNING, "Missing connection type %s, but it is required for the Cluster bus.", connTypeOfCluster()->get_type(NULL));
+        exit(1);
+    }
+
+    int port = defaultClientPort();
+    connListener *listener = &server.clistener;
+    listener->count = 0;
+    listener->bindaddr = server.bindaddr;
+    listener->bindaddr_count = server.bindaddr_count;
+    listener->port = server.cluster_port ? server.cluster_port : port + CLUSTER_PORT_INCR;
+    listener->ct = connTypeOfCluster();
+    if (connListen(listener) == C_ERR ) {
+        /* Note: the following log text is matched by the test suite. */
+        serverLog(LL_WARNING, "Failed listening on port %u (cluster), aborting.", listener->port);
+        exit(1);
+    }
+    
+    if (createSocketAcceptHandler(&server.clistener, clusterAcceptHandler) != C_OK) {
+        serverPanic("Unrecoverable error creating Redis Cluster socket accept handler.");
+    }
+}
+
+/* Reset a node performing a soft or hard reset:
+ *
+ * 1) All other nodes are forgotten.
+ * 2) All the assigned / open slots are released.
+ * 3) If the node is a slave, it turns into a master.
+ * 4) Only for hard reset: a new Node ID is generated.
+ * 5) Only for hard reset: currentEpoch and configEpoch are set to 0.
+ * 6) The new configuration is saved and the cluster state updated.
+ * 7) If the node was a slave, the whole data set is flushed away. */
+void clusterReset(int hard) {
+    dictIterator *di;
+    dictEntry *de;
+    int j;
+
+    /* Turn into master. */
+    if (nodeIsSlave(myself)) {
+        clusterSetNodeAsMaster(myself);
+        replicationUnsetMaster();
+        emptyData(-1,EMPTYDB_NO_FLAGS,NULL);
+    }
+
+    /* Close slots, reset manual failover state. */
+    clusterCloseAllSlots();
+    resetManualFailover();
+
+    /* Unassign all the slots. */
+    for (j = 0; j < CLUSTER_SLOTS; j++) clusterDelSlot(j);
+
+    /* Recreate shards dict */
+    dictEmpty(server.cluster->shards, NULL);
+
+    /* Forget all the nodes, but myself. */
+    di = dictGetSafeIterator(server.cluster->nodes);
+    while((de = dictNext(di)) != NULL) {
+        clusterNode *node = dictGetVal(de);
+
+        if (node == myself) continue;
+        clusterDelNode(node);
+    }
+    dictReleaseIterator(di);
+
+    /* Empty the nodes blacklist. */
+    dictEmpty(server.cluster->nodes_black_list, NULL);
+
+    /* Hard reset only: set epochs to 0, change node ID. */
+    if (hard) {
+        sds oldname;
+
+        server.cluster->currentEpoch = 0;
+        server.cluster->lastVoteEpoch = 0;
+        myself->configEpoch = 0;
+        serverLog(LL_NOTICE, "configEpoch set to 0 via CLUSTER RESET HARD");
+
+        /* To change the Node ID we need to remove the old name from the
+         * nodes table, change the ID, and re-add back with new name. */
+        oldname = sdsnewlen(myself->name, CLUSTER_NAMELEN);
+        dictDelete(server.cluster->nodes,oldname);
+        sdsfree(oldname);
+        getRandomHexChars(myself->name, CLUSTER_NAMELEN);
+        getRandomHexChars(myself->shard_id, CLUSTER_NAMELEN);
+        clusterAddNode(myself);
+        serverLog(LL_NOTICE,"Node hard reset, now I'm %.40s", myself->name);
+    }
+
+    /* Re-populate shards */
+    clusterAddNodeToShard(myself->shard_id, myself);
+
+    /* Make sure to persist the new config and update the state. */
+    clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+                         CLUSTER_TODO_UPDATE_STATE|
+                         CLUSTER_TODO_FSYNC_CONFIG);
+}
+
+/* -----------------------------------------------------------------------------
+ * CLUSTER communication link
+ * -------------------------------------------------------------------------- */
+static clusterMsgSendBlock *createClusterMsgSendBlock(int type, uint32_t msglen) {
+    uint32_t blocklen = msglen + sizeof(clusterMsgSendBlock) - sizeof(clusterMsg);
+    clusterMsgSendBlock *msgblock = zcalloc(blocklen);
+    msgblock->refcount = 1;
+    msgblock->totlen = blocklen;
+    server.stat_cluster_links_memory += blocklen;
+    clusterBuildMessageHdr(&msgblock->msg,type,msglen);
+    return msgblock;
+}
+
+static void clusterMsgSendBlockDecrRefCount(void *node) {
+    clusterMsgSendBlock *msgblock = (clusterMsgSendBlock*)node;
+    msgblock->refcount--;
+    serverAssert(msgblock->refcount >= 0);
+    if (msgblock->refcount == 0) {
+        server.stat_cluster_links_memory -= msgblock->totlen;
+        zfree(msgblock);
+    }
+}
+
+clusterLink *createClusterLink(clusterNode *node) {
+    clusterLink *link = zmalloc(sizeof(*link));
+    link->ctime = mstime();
+    link->send_msg_queue = listCreate();
+    listSetFreeMethod(link->send_msg_queue, clusterMsgSendBlockDecrRefCount);
+    link->head_msg_send_offset = 0;
+    link->send_msg_queue_mem = sizeof(list);
+    link->rcvbuf = zmalloc(link->rcvbuf_alloc = RCVBUF_INIT_LEN);
+    link->rcvbuf_len = 0;
+    server.stat_cluster_links_memory += link->rcvbuf_alloc + link->send_msg_queue_mem;
+    link->conn = NULL;
+    link->node = node;
+    /* Related node can only possibly be known at link creation time if this is an outbound link */
+    link->inbound = (node == NULL);
+    if (!link->inbound) {
+        node->link = link;
+    }
+    return link;
+}
+
+/* Free a cluster link, but does not free the associated node of course.
+ * This function will just make sure that the original node associated
+ * with this link will have the 'link' field set to NULL. */
+void freeClusterLink(clusterLink *link) {
+    if (link->conn) {
+        connClose(link->conn);
+        link->conn = NULL;
+    }
+    server.stat_cluster_links_memory -= sizeof(list) + listLength(link->send_msg_queue)*sizeof(listNode);
+    listRelease(link->send_msg_queue);
+    server.stat_cluster_links_memory -= link->rcvbuf_alloc;
+    zfree(link->rcvbuf);
+    if (link->node) {
+        if (link->node->link == link) {
+            serverAssert(!link->inbound);
+            link->node->link = NULL;
+        } else if (link->node->inbound_link == link) {
+            serverAssert(link->inbound);
+            link->node->inbound_link = NULL;
+        }
+    }
+    zfree(link);
+}
+
+void setClusterNodeToInboundClusterLink(clusterNode *node, clusterLink *link) {
+    serverAssert(!link->node);
+    serverAssert(link->inbound);
+    if (node->inbound_link) {
+        /* A peer may disconnect and then reconnect with us, and it's not guaranteed that
+         * we would always process the disconnection of the existing inbound link before
+         * accepting a new existing inbound link. Therefore, it's possible to have more than
+         * one inbound link from the same node at the same time. Our cleanup logic assumes
+         * a one to one relationship between nodes and inbound links, so we need to kill
+         * one of the links. The existing link is more likely the outdated one, but it's
+         * possible the other node may need to open another link. */
+        serverLog(LL_DEBUG, "Replacing inbound link fd %d from node %.40s with fd %d",
+                node->inbound_link->conn->fd, node->name, link->conn->fd);
+        freeClusterLink(node->inbound_link);
+    }
+    serverAssert(!node->inbound_link);
+    node->inbound_link = link;
+    link->node = node;
+}
+
+static void clusterConnAcceptHandler(connection *conn) {
+    clusterLink *link;
+
+    if (connGetState(conn) != CONN_STATE_CONNECTED) {
+        serverLog(LL_VERBOSE,
+                "Error accepting cluster node connection: %s", connGetLastError(conn));
+        connClose(conn);
+        return;
+    }
+
+    /* Create a link object we use to handle the connection.
+     * It gets passed to the readable handler when data is available.
+     * Initially the link->node pointer is set to NULL as we don't know
+     * which node is, but the right node is references once we know the
+     * node identity. */
+    link = createClusterLink(NULL);
+    link->conn = conn;
+    connSetPrivateData(conn, link);
+
+    /* Register read handler */
+    connSetReadHandler(conn, clusterReadHandler);
+}
+
+#define MAX_CLUSTER_ACCEPTS_PER_CALL 1000
+void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
+    int cport, cfd;
+    int max = MAX_CLUSTER_ACCEPTS_PER_CALL;
+    char cip[NET_IP_STR_LEN];
+    int require_auth = TLS_CLIENT_AUTH_YES;
+    UNUSED(el);
+    UNUSED(mask);
+    UNUSED(privdata);
+
+    /* If the server is starting up, don't accept cluster connections:
+     * UPDATE messages may interact with the database content. */
+    if (server.masterhost == NULL && server.loading) return;
+
+    while(max--) {
+        cfd = anetTcpAccept(server.neterr, fd, cip, sizeof(cip), &cport);
+        if (cfd == ANET_ERR) {
+            if (errno != EWOULDBLOCK)
+                serverLog(LL_VERBOSE,
+                    "Error accepting cluster node: %s", server.neterr);
+            return;
+        }
+
+        connection *conn = connCreateAccepted(connTypeOfCluster(), cfd, &require_auth);
+
+        /* Make sure connection is not in an error state */
+        if (connGetState(conn) != CONN_STATE_ACCEPTING) {
+            serverLog(LL_VERBOSE,
+                "Error creating an accepting connection for cluster node: %s",
+                    connGetLastError(conn));
+            connClose(conn);
+            return;
+        }
+        connEnableTcpNoDelay(conn);
+        connKeepAlive(conn,server.cluster_node_timeout / 1000 * 2);
+
+        /* Use non-blocking I/O for cluster messages. */
+        serverLog(LL_VERBOSE,"Accepting cluster node connection from %s:%d", cip, cport);
+
+        /* Accept the connection now.  connAccept() may call our handler directly
+         * or schedule it for later depending on connection implementation.
+         */
+        if (connAccept(conn, clusterConnAcceptHandler) == C_ERR) {
+            if (connGetState(conn) == CONN_STATE_ERROR)
+                serverLog(LL_VERBOSE,
+                        "Error accepting cluster node connection: %s",
+                        connGetLastError(conn));
+            connClose(conn);
+            return;
+        }
+    }
+}
+
+/* Return the approximated number of sockets we are using in order to
+ * take the cluster bus connections. */
+unsigned long getClusterConnectionsCount(void) {
+    /* We decrement the number of nodes by one, since there is the
+     * "myself" node too in the list. Each node uses two file descriptors,
+     * one incoming and one outgoing, thus the multiplication by 2. */
+    return server.cluster_enabled ?
+           ((dictSize(server.cluster->nodes)-1)*2) : 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * CLUSTER node API
+ * -------------------------------------------------------------------------- */
+
+/* Create a new cluster node, with the specified flags.
+ * If "nodename" is NULL this is considered a first handshake and a random
+ * node name is assigned to this node (it will be fixed later when we'll
+ * receive the first pong).
+ *
+ * The node is created and returned to the user, but it is not automatically
+ * added to the nodes hash table. */
+clusterNode *createClusterNode(char *nodename, int flags) {
+    clusterNode *node = zmalloc(sizeof(*node));
+
+    if (nodename)
+        memcpy(node->name, nodename, CLUSTER_NAMELEN);
+    else
+        getRandomHexChars(node->name, CLUSTER_NAMELEN);
+    getRandomHexChars(node->shard_id, CLUSTER_NAMELEN);
+    node->ctime = mstime();
+    node->configEpoch = 0;
+    node->flags = flags;
+    memset(node->slots,0,sizeof(node->slots));
+    node->slot_info_pairs = NULL;
+    node->slot_info_pairs_count = 0;
+    node->numslots = 0;
+    node->numslaves = 0;
+    node->slaves = NULL;
+    node->slaveof = NULL;
+    node->last_in_ping_gossip = 0;
+    node->ping_sent = node->pong_received = 0;
+    node->data_received = 0;
+    node->fail_time = 0;
+    node->link = NULL;
+    node->inbound_link = NULL;
+    memset(node->ip,0,sizeof(node->ip));
+    node->hostname = sdsempty();
+    node->human_nodename = sdsempty();
+    node->tcp_port = 0;
+    node->cport = 0;
+    node->tls_port = 0;
+    node->fail_reports = listCreate();
+    node->voted_time = 0;
+    node->orphaned_time = 0;
+    node->repl_offset_time = 0;
+    node->repl_offset = 0;
+    listSetFreeMethod(node->fail_reports,zfree);
+    return node;
+}
+
+/* This function is called every time we get a failure report from a node.
+ * The side effect is to populate the fail_reports list (or to update
+ * the timestamp of an existing report).
+ *
+ * 'failing' is the node that is in failure state according to the
+ * 'sender' node.
+ *
+ * The function returns 0 if it just updates a timestamp of an existing
+ * failure report from the same sender. 1 is returned if a new failure
+ * report is created. */
+int clusterNodeAddFailureReport(clusterNode *failing, clusterNode *sender) {
+    list *l = failing->fail_reports;
+    listNode *ln;
+    listIter li;
+    clusterNodeFailReport *fr;
+
+    /* If a failure report from the same sender already exists, just update
+     * the timestamp. */
+    listRewind(l,&li);
+    while ((ln = listNext(&li)) != NULL) {
+        fr = ln->value;
+        if (fr->node == sender) {
+            fr->time = mstime();
+            return 0;
+        }
+    }
+
+    /* Otherwise create a new report. */
+    fr = zmalloc(sizeof(*fr));
+    fr->node = sender;
+    fr->time = mstime();
+    listAddNodeTail(l,fr);
+    return 1;
+}
+
+/* Remove failure reports that are too old, where too old means reasonably
+ * older than the global node timeout. Note that anyway for a node to be
+ * flagged as FAIL we need to have a local PFAIL state that is at least
+ * older than the global node timeout, so we don't just trust the number
+ * of failure reports from other nodes. */
+void clusterNodeCleanupFailureReports(clusterNode *node) {
+    list *l = node->fail_reports;
+    listNode *ln;
+    listIter li;
+    clusterNodeFailReport *fr;
+    mstime_t maxtime = server.cluster_node_timeout *
+                     CLUSTER_FAIL_REPORT_VALIDITY_MULT;
+    mstime_t now = mstime();
+
+    listRewind(l,&li);
+    while ((ln = listNext(&li)) != NULL) {
+        fr = ln->value;
+        if (now - fr->time > maxtime) listDelNode(l,ln);
+    }
+}
+
+/* Remove the failing report for 'node' if it was previously considered
+ * failing by 'sender'. This function is called when a node informs us via
+ * gossip that a node is OK from its point of view (no FAIL or PFAIL flags).
+ *
+ * Note that this function is called relatively often as it gets called even
+ * when there are no nodes failing, and is O(N), however when the cluster is
+ * fine the failure reports list is empty so the function runs in constant
+ * time.
+ *
+ * The function returns 1 if the failure report was found and removed.
+ * Otherwise 0 is returned. */
+int clusterNodeDelFailureReport(clusterNode *node, clusterNode *sender) {
+    list *l = node->fail_reports;
+    listNode *ln;
+    listIter li;
+    clusterNodeFailReport *fr;
+
+    /* Search for a failure report from this sender. */
+    listRewind(l,&li);
+    while ((ln = listNext(&li)) != NULL) {
+        fr = ln->value;
+        if (fr->node == sender) break;
+    }
+    if (!ln) return 0; /* No failure report from this sender. */
+
+    /* Remove the failure report. */
+    listDelNode(l,ln);
+    clusterNodeCleanupFailureReports(node);
+    return 1;
+}
+
+/* Return the number of external nodes that believe 'node' is failing,
+ * not including this node, that may have a PFAIL or FAIL state for this
+ * node as well. */
+int clusterNodeFailureReportsCount(clusterNode *node) {
+    clusterNodeCleanupFailureReports(node);
+    return listLength(node->fail_reports);
+}
+
+int clusterNodeRemoveSlave(clusterNode *master, clusterNode *slave) {
+    int j;
+
+    for (j = 0; j < master->numslaves; j++) {
+        if (master->slaves[j] == slave) {
+            if ((j+1) < master->numslaves) {
+                int remaining_slaves = (master->numslaves - j) - 1;
+                memmove(master->slaves+j,master->slaves+(j+1),
+                        (sizeof(*master->slaves) * remaining_slaves));
+            }
+            master->numslaves--;
+            if (master->numslaves == 0)
+                master->flags &= ~CLUSTER_NODE_MIGRATE_TO;
+            return C_OK;
+        }
+    }
+    return C_ERR;
+}
+
+int clusterNodeAddSlave(clusterNode *master, clusterNode *slave) {
+    int j;
+
+    /* If it's already a slave, don't add it again. */
+    for (j = 0; j < master->numslaves; j++)
+        if (master->slaves[j] == slave) return C_ERR;
+    master->slaves = zrealloc(master->slaves,
+        sizeof(clusterNode*)*(master->numslaves+1));
+    master->slaves[master->numslaves] = slave;
+    master->numslaves++;
+    master->flags |= CLUSTER_NODE_MIGRATE_TO;
+    return C_OK;
+}
+
+int clusterCountNonFailingSlaves(clusterNode *n) {
+    int j, okslaves = 0;
+
+    for (j = 0; j < n->numslaves; j++)
+        if (!nodeFailed(n->slaves[j])) okslaves++;
+    return okslaves;
+}
+
+/* Low level cleanup of the node structure. Only called by clusterDelNode(). */
+void freeClusterNode(clusterNode *n) {
+    sds nodename;
+    int j;
+
+    /* If the node has associated slaves, we have to set
+     * all the slaves->slaveof fields to NULL (unknown). */
+    for (j = 0; j < n->numslaves; j++)
+        n->slaves[j]->slaveof = NULL;
+
+    /* Remove this node from the list of slaves of its master. */
+    if (nodeIsSlave(n) && n->slaveof) clusterNodeRemoveSlave(n->slaveof,n);
+
+    /* Unlink from the set of nodes. */
+    nodename = sdsnewlen(n->name, CLUSTER_NAMELEN);
+    serverAssert(dictDelete(server.cluster->nodes,nodename) == DICT_OK);
+    sdsfree(nodename);
+    sdsfree(n->hostname);
+    sdsfree(n->human_nodename);
+
+    /* Release links and associated data structures. */
+    if (n->link) freeClusterLink(n->link);
+    if (n->inbound_link) freeClusterLink(n->inbound_link);
+    listRelease(n->fail_reports);
+    zfree(n->slaves);
+    zfree(n);
+}
+
+/* Add a node to the nodes hash table */
+void clusterAddNode(clusterNode *node) {
+    int retval;
+
+    retval = dictAdd(server.cluster->nodes,
+            sdsnewlen(node->name,CLUSTER_NAMELEN), node);
+    serverAssert(retval == DICT_OK);
+}
+
+/* Remove a node from the cluster. The function performs the high level
+ * cleanup, calling freeClusterNode() for the low level cleanup.
+ * Here we do the following:
+ *
+ * 1) Mark all the slots handled by it as unassigned.
+ * 2) Remove all the failure reports sent by this node and referenced by
+ *    other nodes.
+ * 3) Remove the node from the owning shard
+ * 4) Free the node with freeClusterNode() that will in turn remove it
+ *    from the hash table and from the list of slaves of its master, if
+ *    it is a slave node.
+ */
+void clusterDelNode(clusterNode *delnode) {
+    int j;
+    dictIterator *di;
+    dictEntry *de;
+
+    /* 1) Mark slots as unassigned. */
+    for (j = 0; j < CLUSTER_SLOTS; j++) {
+        if (server.cluster->importing_slots_from[j] == delnode)
+            server.cluster->importing_slots_from[j] = NULL;
+        if (server.cluster->migrating_slots_to[j] == delnode)
+            server.cluster->migrating_slots_to[j] = NULL;
+        if (server.cluster->slots[j] == delnode)
+            clusterDelSlot(j);
+    }
+
+    /* 2) Remove failure reports. */
+    di = dictGetSafeIterator(server.cluster->nodes);
+    while((de = dictNext(di)) != NULL) {
+        clusterNode *node = dictGetVal(de);
+
+        if (node == delnode) continue;
+        clusterNodeDelFailureReport(node,delnode);
+    }
+    dictReleaseIterator(di);
+
+    /* 3) Remove the node from the owning shard */
+    clusterRemoveNodeFromShard(delnode);
+
+    /* 4) Free the node, unlinking it from the cluster. */
+    freeClusterNode(delnode);
+}
+
+/* Node lookup by name */
+clusterNode *clusterLookupNode(const char *name, int length) {
+    if (verifyClusterNodeId(name, length) != C_OK) return NULL;
+    sds s = sdsnewlen(name, length);
+    dictEntry *de = dictFind(server.cluster->nodes, s);
+    sdsfree(s);
+    if (de == NULL) return NULL;
+    return dictGetVal(de);
+}
+
+/* Get all the nodes in my shard.
+ * Note that the list returned is not computed on the fly
+ * via slaveof; rather, it is maintained permanently to
+ * track the shard membership and its life cycle is tied
+ * to this Redis process. Therefore, the caller must not
+ * release the list. */
+list *clusterGetNodesInMyShard(clusterNode *node) {
+    sds s = sdsnewlen(node->shard_id, CLUSTER_NAMELEN);
+    dictEntry *de = dictFind(server.cluster->shards,s);
+    sdsfree(s);
+    return (de != NULL) ? dictGetVal(de) : NULL;
+}
+
+/* This is only used after the handshake. When we connect a given IP/PORT
+ * as a result of CLUSTER MEET we don't have the node name yet, so we
+ * pick a random one, and will fix it when we receive the PONG request using
+ * this function. */
+void clusterRenameNode(clusterNode *node, char *newname) {
+    int retval;
+    sds s = sdsnewlen(node->name, CLUSTER_NAMELEN);
+
+    serverLog(LL_DEBUG,"Renaming node %.40s into %.40s",
+        node->name, newname);
+    retval = dictDelete(server.cluster->nodes, s);
+    sdsfree(s);
+    serverAssert(retval == DICT_OK);
+    memcpy(node->name, newname, CLUSTER_NAMELEN);
+    clusterAddNode(node);
+    clusterAddNodeToShard(node->shard_id, node);
+}
+
+void clusterAddNodeToShard(const char *shard_id, clusterNode *node) {
+    sds s = sdsnewlen(shard_id, CLUSTER_NAMELEN);
+    dictEntry *de = dictFind(server.cluster->shards,s);
+    if (de == NULL) {
+        list *l = listCreate();
+        listAddNodeTail(l, node);
+        serverAssert(dictAdd(server.cluster->shards, s, l) == DICT_OK);
+    } else {
+        list *l = dictGetVal(de);
+        if (listSearchKey(l, node) == NULL) {
+            listAddNodeTail(l, node);
+        }
+        sdsfree(s);
+    }
+}
+
+void clusterRemoveNodeFromShard(clusterNode *node) {
+    sds s = sdsnewlen(node->shard_id, CLUSTER_NAMELEN);
+    dictEntry *de = dictFind(server.cluster->shards, s);
+    if (de != NULL) {
+        list *l = dictGetVal(de);
+        listNode *ln = listSearchKey(l, node);
+        if (ln != NULL) {
+            listDelNode(l, ln);
+        }
+        if (listLength(l) == 0) {
+            dictDelete(server.cluster->shards, s);
+        }
+    }
+    sdsfree(s);
+}
+
+/* -----------------------------------------------------------------------------
+ * CLUSTER config epoch handling
+ * -------------------------------------------------------------------------- */
+
+/* Return the greatest configEpoch found in the cluster, or the current
+ * epoch if greater than any node configEpoch. */
+uint64_t clusterGetMaxEpoch(void) {
+    uint64_t max = 0;
+    dictIterator *di;
+    dictEntry *de;
+
+    di = dictGetSafeIterator(server.cluster->nodes);
+    while((de = dictNext(di)) != NULL) {
+        clusterNode *node = dictGetVal(de);
+        if (node->configEpoch > max) max = node->configEpoch;
+    }
+    dictReleaseIterator(di);
+    if (max < server.cluster->currentEpoch) max = server.cluster->currentEpoch;
+    return max;
+}
+
+/* If this node epoch is zero or is not already the greatest across the
+ * cluster (from the POV of the local configuration), this function will:
+ *
+ * 1) Generate a new config epoch, incrementing the current epoch.
+ * 2) Assign the new epoch to this node, WITHOUT any consensus.
+ * 3) Persist the configuration on disk before sending packets with the
+ *    new configuration.
+ *
+ * If the new config epoch is generated and assigned, C_OK is returned,
+ * otherwise C_ERR is returned (since the node has already the greatest
+ * configuration around) and no operation is performed.
+ *
+ * Important note: this function violates the principle that config epochs
+ * should be generated with consensus and should be unique across the cluster.
+ * However Redis Cluster uses this auto-generated new config epochs in two
+ * cases:
+ *
+ * 1) When slots are closed after importing. Otherwise resharding would be
+ *    too expensive.
+ * 2) When CLUSTER FAILOVER is called with options that force a slave to
+ *    failover its master even if there is not master majority able to
+ *    create a new configuration epoch.
+ *
+ * Redis Cluster will not explode using this function, even in the case of
+ * a collision between this node and another node, generating the same
+ * configuration epoch unilaterally, because the config epoch conflict
+ * resolution algorithm will eventually move colliding nodes to different
+ * config epochs. However using this function may violate the "last failover
+ * wins" rule, so should only be used with care. */
+int clusterBumpConfigEpochWithoutConsensus(void) {
+    uint64_t maxEpoch = clusterGetMaxEpoch();
+
+    if (myself->configEpoch == 0 ||
+        myself->configEpoch != maxEpoch)
+    {
+        server.cluster->currentEpoch++;
+        myself->configEpoch = server.cluster->currentEpoch;
+        clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+                             CLUSTER_TODO_FSYNC_CONFIG);
+        serverLog(LL_NOTICE,
+            "New configEpoch set to %llu",
+            (unsigned long long) myself->configEpoch);
+        return C_OK;
+    } else {
+        return C_ERR;
+    }
+}
+
+/* This function is called when this node is a master, and we receive from
+ * another master a configuration epoch that is equal to our configuration
+ * epoch.
+ *
+ * BACKGROUND
+ *
+ * It is not possible that different slaves get the same config
+ * epoch during a failover election, because the slaves need to get voted
+ * by a majority. However when we perform a manual resharding of the cluster
+ * the node will assign a configuration epoch to itself without to ask
+ * for agreement. Usually resharding happens when the cluster is working well
+ * and is supervised by the sysadmin, however it is possible for a failover
+ * to happen exactly while the node we are resharding a slot to assigns itself
+ * a new configuration epoch, but before it is able to propagate it.
+ *
+ * So technically it is possible in this condition that two nodes end with
+ * the same configuration epoch.
+ *
+ * Another possibility is that there are bugs in the implementation causing
+ * this to happen.
+ *
+ * Moreover when a new cluster is created, all the nodes start with the same
+ * configEpoch. This collision resolution code allows nodes to automatically
+ * end with a different configEpoch at startup automatically.
+ *
+ * In all the cases, we want a mechanism that resolves this issue automatically
+ * as a safeguard. The same configuration epoch for masters serving different
+ * set of slots is not harmful, but it is if the nodes end serving the same
+ * slots for some reason (manual errors or software bugs) without a proper
+ * failover procedure.
+ *
+ * In general we want a system that eventually always ends with different
+ * masters having different configuration epochs whatever happened, since
+ * nothing is worse than a split-brain condition in a distributed system.
+ *
+ * BEHAVIOR
+ *
+ * When this function gets called, what happens is that if this node
+ * has the lexicographically smaller Node ID compared to the other node
+ * with the conflicting epoch (the 'sender' node), it will assign itself
+ * the greatest configuration epoch currently detected among nodes plus 1.
+ *
+ * This means that even if there are multiple nodes colliding, the node
+ * with the greatest Node ID never moves forward, so eventually all the nodes
+ * end with a different configuration epoch.
+ */
+void clusterHandleConfigEpochCollision(clusterNode *sender) {
+    /* Prerequisites: nodes have the same configEpoch and are both masters. */
+    if (sender->configEpoch != myself->configEpoch ||
+        !clusterNodeIsMaster(sender) || !clusterNodeIsMaster(myself)) return;
+    /* Don't act if the colliding node has a smaller Node ID. */
+    if (memcmp(sender->name,myself->name,CLUSTER_NAMELEN) <= 0) return;
+    /* Get the next ID available at the best of this node knowledge. */
+    server.cluster->currentEpoch++;
+    myself->configEpoch = server.cluster->currentEpoch;
+    clusterSaveConfigOrDie(1);
+    serverLog(LL_VERBOSE,
+        "WARNING: configEpoch collision with node %.40s (%s)."
+        " configEpoch set to %llu",
+        sender->name,sender->human_nodename,
+        (unsigned long long) myself->configEpoch);
+}
+
+/* -----------------------------------------------------------------------------
+ * CLUSTER nodes blacklist
+ *
+ * The nodes blacklist is just a way to ensure that a given node with a given
+ * Node ID is not re-added before some time elapsed (this time is specified
+ * in seconds in CLUSTER_BLACKLIST_TTL).
+ *
+ * This is useful when we want to remove a node from the cluster completely:
+ * when CLUSTER FORGET is called, it also puts the node into the blacklist so
+ * that even if we receive gossip messages from other nodes that still remember
+ * about the node we want to remove, we don't re-add it before some time.
+ *
+ * Currently the CLUSTER_BLACKLIST_TTL is set to 1 minute, this means
+ * that redis-cli has 60 seconds to send CLUSTER FORGET messages to nodes
+ * in the cluster without dealing with the problem of other nodes re-adding
+ * back the node to nodes we already sent the FORGET command to.
+ *
+ * The data structure used is a hash table with an sds string representing
+ * the node ID as key, and the time when it is ok to re-add the node as
+ * value.
+ * -------------------------------------------------------------------------- */
+
+#define CLUSTER_BLACKLIST_TTL 60      /* 1 minute. */
+
+
+/* Before of the addNode() or Exists() operations we always remove expired
+ * entries from the black list. This is an O(N) operation but it is not a
+ * problem since add / exists operations are called very infrequently and
+ * the hash table is supposed to contain very little elements at max.
+ * However without the cleanup during long uptime and with some automated
+ * node add/removal procedures, entries could accumulate. */
+void clusterBlacklistCleanup(void) {
+    dictIterator *di;
+    dictEntry *de;
+
+    di = dictGetSafeIterator(server.cluster->nodes_black_list);
+    while((de = dictNext(di)) != NULL) {
+        int64_t expire = dictGetUnsignedIntegerVal(de);
+
+        if (expire < server.unixtime)
+            dictDelete(server.cluster->nodes_black_list,dictGetKey(de));
+    }
+    dictReleaseIterator(di);
+}
+
+/* Cleanup the blacklist and add a new node ID to the black list. */
+void clusterBlacklistAddNode(clusterNode *node) {
+    dictEntry *de;
+    sds id = sdsnewlen(node->name,CLUSTER_NAMELEN);
+
+    clusterBlacklistCleanup();
+    if (dictAdd(server.cluster->nodes_black_list,id,NULL) == DICT_OK) {
+        /* If the key was added, duplicate the sds string representation of
+         * the key for the next lookup. We'll free it at the end. */
+        id = sdsdup(id);
+    }
+    de = dictFind(server.cluster->nodes_black_list,id);
+    dictSetUnsignedIntegerVal(de,time(NULL)+CLUSTER_BLACKLIST_TTL);
+    sdsfree(id);
+}
+
+/* Return non-zero if the specified node ID exists in the blacklist.
+ * You don't need to pass an sds string here, any pointer to 40 bytes
+ * will work. */
+int clusterBlacklistExists(char *nodeid) {
+    sds id = sdsnewlen(nodeid,CLUSTER_NAMELEN);
+    int retval;
+
+    clusterBlacklistCleanup();
+    retval = dictFind(server.cluster->nodes_black_list,id) != NULL;
+    sdsfree(id);
+    return retval;
+}
+
+/* -----------------------------------------------------------------------------
+ * CLUSTER messages exchange - PING/PONG and gossip
+ * -------------------------------------------------------------------------- */
+
+/* This function checks if a given node should be marked as FAIL.
+ * It happens if the following conditions are met:
+ *
+ * 1) We received enough failure reports from other master nodes via gossip.
+ *    Enough means that the majority of the masters signaled the node is
+ *    down recently.
+ * 2) We believe this node is in PFAIL state.
+ *
+ * If a failure is detected we also inform the whole cluster about this
+ * event trying to force every other node to set the FAIL flag for the node.
+ *
+ * Note that the form of agreement used here is weak, as we collect the majority
+ * of masters state during some time, and even if we force agreement by
+ * propagating the FAIL message, because of partitions we may not reach every
+ * node. However:
+ *
+ * 1) Either we reach the majority and eventually the FAIL state will propagate
+ *    to all the cluster.
+ * 2) Or there is no majority so no slave promotion will be authorized and the
+ *    FAIL flag will be cleared after some time.
+ */
+void markNodeAsFailingIfNeeded(clusterNode *node) {
+    int failures;
+    int needed_quorum = (server.cluster->size / 2) + 1;
+
+    if (!nodeTimedOut(node)) return; /* We can reach it. */
+    if (nodeFailed(node)) return; /* Already FAILing. */
+
+    failures = clusterNodeFailureReportsCount(node);
+    /* Also count myself as a voter if I'm a master. */
+    if (clusterNodeIsMaster(myself)) failures++;
+    if (failures < needed_quorum) return; /* No weak agreement from masters. */
+
+    serverLog(LL_NOTICE,
+        "Marking node %.40s (%s) as failing (quorum reached).", node->name, node->human_nodename);
+
+    /* Mark the node as failing. */
+    node->flags &= ~CLUSTER_NODE_PFAIL;
+    node->flags |= CLUSTER_NODE_FAIL;
+    node->fail_time = mstime();
+
+    /* Broadcast the failing node name to everybody, forcing all the other
+     * reachable nodes to flag the node as FAIL.
+     * We do that even if this node is a replica and not a master: anyway
+     * the failing state is triggered collecting failure reports from masters,
+     * so here the replica is only helping propagating this status. */
+    clusterSendFail(node->name);
+    clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
+}
+
+/* This function is called only if a node is marked as FAIL, but we are able
+ * to reach it again. It checks if there are the conditions to undo the FAIL
+ * state. */
+void clearNodeFailureIfNeeded(clusterNode *node) {
+    mstime_t now = mstime();
+
+    serverAssert(nodeFailed(node));
+
+    /* For slaves we always clear the FAIL flag if we can contact the
+     * node again. */
+    if (nodeIsSlave(node) || node->numslots == 0) {
+        serverLog(LL_NOTICE,
+            "Clear FAIL state for node %.40s (%s):%s is reachable again.",
+                node->name,node->human_nodename,
+                nodeIsSlave(node) ? "replica" : "master without slots");
+        node->flags &= ~CLUSTER_NODE_FAIL;
+        clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
+    }
+
+    /* If it is a master and...
+     * 1) The FAIL state is old enough.
+     * 2) It is yet serving slots from our point of view (not failed over).
+     * Apparently no one is going to fix these slots, clear the FAIL flag. */
+    if (clusterNodeIsMaster(node) && node->numslots > 0 &&
+        (now - node->fail_time) >
+        (server.cluster_node_timeout * CLUSTER_FAIL_UNDO_TIME_MULT))
+    {
+        serverLog(LL_NOTICE,
+            "Clear FAIL state for node %.40s (%s): is reachable again and nobody is serving its slots after some time.",
+                node->name, node->human_nodename);
+        node->flags &= ~CLUSTER_NODE_FAIL;
+        clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
+    }
+}
+
+/* Return true if we already have a node in HANDSHAKE state matching the
+ * specified ip address and port number. This function is used in order to
+ * avoid adding a new handshake node for the same address multiple times. */
+int clusterHandshakeInProgress(char *ip, int port, int cport) {
+    dictIterator *di;
+    dictEntry *de;
+
+    di = dictGetSafeIterator(server.cluster->nodes);
+    while((de = dictNext(di)) != NULL) {
+        clusterNode *node = dictGetVal(de);
+
+        if (!nodeInHandshake(node)) continue;
+        if (!strcasecmp(node->ip,ip) &&
+            getNodeDefaultClientPort(node) == port &&
+            node->cport == cport) break;
+    }
+    dictReleaseIterator(di);
+    return de != NULL;
+}
+
+/* Start a handshake with the specified address if there is not one
+ * already in progress. Returns non-zero if the handshake was actually
+ * started. On error zero is returned and errno is set to one of the
+ * following values:
+ *
+ * EAGAIN - There is already a handshake in progress for this address.
+ * EINVAL - IP or port are not valid. */
+int clusterStartHandshake(char *ip, int port, int cport) {
+    clusterNode *n;
+    char norm_ip[NET_IP_STR_LEN];
+    struct sockaddr_storage sa;
+
+    /* IP sanity check */
+    if (inet_pton(AF_INET,ip,
+            &(((struct sockaddr_in *)&sa)->sin_addr)))
+    {
+        sa.ss_family = AF_INET;
+    } else if (inet_pton(AF_INET6,ip,
+            &(((struct sockaddr_in6 *)&sa)->sin6_addr)))
+    {
+        sa.ss_family = AF_INET6;
+    } else {
+        errno = EINVAL;
+        return 0;
+    }
+
+    /* Port sanity check */
+    if (port <= 0 || port > 65535 || cport <= 0 || cport > 65535) {
+        errno = EINVAL;
+        return 0;
+    }
+
+    /* Set norm_ip as the normalized string representation of the node
+     * IP address. */
+    memset(norm_ip,0,NET_IP_STR_LEN);
+    if (sa.ss_family == AF_INET)
+        inet_ntop(AF_INET,
+            (void*)&(((struct sockaddr_in *)&sa)->sin_addr),
+            norm_ip,NET_IP_STR_LEN);
+    else
+        inet_ntop(AF_INET6,
+            (void*)&(((struct sockaddr_in6 *)&sa)->sin6_addr),
+            norm_ip,NET_IP_STR_LEN);
+
+    if (clusterHandshakeInProgress(norm_ip,port,cport)) {
+        errno = EAGAIN;
+        return 0;
+    }
+
+    /* Add the node with a random address (NULL as first argument to
+     * createClusterNode()). Everything will be fixed during the
+     * handshake. */
+    n = createClusterNode(NULL,CLUSTER_NODE_HANDSHAKE|CLUSTER_NODE_MEET);
+    memcpy(n->ip,norm_ip,sizeof(n->ip));
+    if (server.tls_cluster) {
+        n->tls_port = port;
+    } else {
+        n->tcp_port = port;
+    }
+    n->cport = cport;
+    clusterAddNode(n);
+    return 1;
+}
+
+static void getClientPortFromClusterMsg(clusterMsg *hdr, int *tls_port, int *tcp_port) {
+    if (server.tls_cluster) {
+        *tls_port = ntohs(hdr->port);
+        *tcp_port = ntohs(hdr->pport);
+    } else {
+        *tls_port = ntohs(hdr->pport);
+        *tcp_port = ntohs(hdr->port);
+    }
+}
+
+static void getClientPortFromGossip(clusterMsgDataGossip *g, int *tls_port, int *tcp_port) {
+    if (server.tls_cluster) {
+        *tls_port = ntohs(g->port);
+        *tcp_port = ntohs(g->pport);
+    } else {
+        *tls_port = ntohs(g->pport);
+        *tcp_port = ntohs(g->port);
+    }
+}
+
+/* Returns a string with the byte representation of the node ID (i.e. nodename)
+ * along with 8 trailing bytes for debugging purposes. */
+char *getCorruptedNodeIdByteString(clusterMsgDataGossip *gossip_msg) {
+    const int num_bytes = CLUSTER_NAMELEN + 8;
+    /* Allocate enough room for 4 chars per byte + null terminator */
+    char *byte_string = (char*) zmalloc((num_bytes*4) + 1); 
+    const char *name_ptr = gossip_msg->nodename;
+
+    /* Ensure we won't print beyond the bounds of the message */
+    serverAssert(name_ptr + num_bytes <= (char*)gossip_msg + sizeof(clusterMsgDataGossip));
+
+    for (int i = 0; i < num_bytes; i++) {
+        snprintf(byte_string + 4*i, 5, "\\x%02hhX", name_ptr[i]);
+    }
+    return byte_string;
+}
+
+/* Returns the number of nodes in the gossip with invalid IDs. */
+int verifyGossipSectionNodeIds(clusterMsgDataGossip *g, uint16_t count) {
+    int invalid_ids = 0;
+    for (int i = 0; i < count; i++) {
+        const char *nodename = g[i].nodename;
+        if (verifyClusterNodeId(nodename, CLUSTER_NAMELEN) != C_OK) {
+            invalid_ids++;
+            char *raw_node_id = getCorruptedNodeIdByteString(g);
+            serverLog(LL_WARNING,
+                      "Received gossip about a node with invalid ID %.40s. For debugging purposes, "
+                      "the 48 bytes including the invalid ID and 8 trailing bytes are: %s",
+                      nodename, raw_node_id);
+            zfree(raw_node_id);
+        }
+    }
+    return invalid_ids;
+}
+
+/* Process the gossip section of PING or PONG packets.
+ * Note that this function assumes that the packet is already sanity-checked
+ * by the caller, not in the content of the gossip section, but in the
+ * length. */
+void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) {
+    uint16_t count = ntohs(hdr->count);
+    clusterMsgDataGossip *g = (clusterMsgDataGossip*) hdr->data.ping.gossip;
+    clusterNode *sender = link->node ? link->node : clusterLookupNode(hdr->sender, CLUSTER_NAMELEN);
+
+    /* Abort if the gossip contains invalid node IDs to avoid adding incorrect information to
+     * the nodes dictionary. An invalid ID indicates memory corruption on the sender side. */
+    int invalid_ids = verifyGossipSectionNodeIds(g, count);
+    if (invalid_ids) {
+        if (sender) {
+            serverLog(LL_WARNING, "Node %.40s (%s) gossiped %d nodes with invalid IDs.", sender->name, sender->human_nodename, invalid_ids);
+        } else {
+            serverLog(LL_WARNING, "Unknown node gossiped %d nodes with invalid IDs.", invalid_ids);
+        }
+        return;
+    }
+
+    while(count--) {
+        uint16_t flags = ntohs(g->flags);
+        clusterNode *node;
+        sds ci;
+
+        if (server.verbosity == LL_DEBUG) {
+            ci = representClusterNodeFlags(sdsempty(), flags);
+            serverLog(LL_DEBUG,"GOSSIP %.40s %s:%d@%d %s",
+                g->nodename,
+                g->ip,
+                ntohs(g->port),
+                ntohs(g->cport),
+                ci);
+            sdsfree(ci);
+        }
+
+        /* Convert port and pport into TCP port and TLS port. */
+        int msg_tls_port, msg_tcp_port;
+        getClientPortFromGossip(g, &msg_tls_port, &msg_tcp_port);
+
+        /* Update our state accordingly to the gossip sections */
+        node = clusterLookupNode(g->nodename, CLUSTER_NAMELEN);
+        /* Ignore gossips about self. */
+        if (node && node != myself) {
+            /* We already know this node.
+               Handle failure reports, only when the sender is a master. */
+            if (sender && clusterNodeIsMaster(sender)) {
+                if (flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) {
+                    if (clusterNodeAddFailureReport(node,sender)) {
+                        serverLog(LL_VERBOSE,
+                            "Node %.40s (%s) reported node %.40s (%s) as not reachable.",
+                            sender->name, sender->human_nodename, node->name, node->human_nodename);
+                    }
+                    markNodeAsFailingIfNeeded(node);
+                } else {
+                    if (clusterNodeDelFailureReport(node,sender)) {
+                        serverLog(LL_VERBOSE,
+                            "Node %.40s (%s) reported node %.40s (%s) is back online.",
+                            sender->name, sender->human_nodename, node->name, node->human_nodename);
+                    }
+                }
+            }
+
+            /* If from our POV the node is up (no failure flags are set),
+             * we have no pending ping for the node, nor we have failure
+             * reports for this node, update the last pong time with the
+             * one we see from the other nodes. */
+            if (!(flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) &&
+                node->ping_sent == 0 &&
+                clusterNodeFailureReportsCount(node) == 0)
+            {
+                mstime_t pongtime = ntohl(g->pong_received);
+                pongtime *= 1000; /* Convert back to milliseconds. */
+
+                /* Replace the pong time with the received one only if
+                 * it's greater than our view but is not in the future
+                 * (with 500 milliseconds tolerance) from the POV of our
+                 * clock. */
+                if (pongtime <= (server.mstime+500) &&
+                    pongtime > node->pong_received)
+                {
+                    node->pong_received = pongtime;
+                }
+            }
+
+            /* If we already know this node, but it is not reachable, and
+             * we see a different address in the gossip section of a node that
+             * can talk with this other node, update the address, disconnect
+             * the old link if any, so that we'll attempt to connect with the
+             * new address. */
+            if (node->flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL) &&
+                !(flags & CLUSTER_NODE_NOADDR) &&
+                !(flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) &&
+                (strcasecmp(node->ip,g->ip) ||
+                 node->tls_port != (server.tls_cluster ? ntohs(g->port) : ntohs(g->pport)) ||
+                 node->tcp_port != (server.tls_cluster ? ntohs(g->pport) : ntohs(g->port)) ||
+                 node->cport != ntohs(g->cport)))
+            {
+                if (node->link) freeClusterLink(node->link);
+                memcpy(node->ip,g->ip,NET_IP_STR_LEN);
+                node->tcp_port = msg_tcp_port;
+                node->tls_port = msg_tls_port;
+                node->cport = ntohs(g->cport);
+                node->flags &= ~CLUSTER_NODE_NOADDR;
+            }
+        } else if (!node) {
+            /* If it's not in NOADDR state and we don't have it, we
+             * add it to our trusted dict with exact nodeid and flag.
+             * Note that we cannot simply start a handshake against
+             * this IP/PORT pairs, since IP/PORT can be reused already,
+             * otherwise we risk joining another cluster.
+             *
+             * Note that we require that the sender of this gossip message
+             * is a well known node in our cluster, otherwise we risk
+             * joining another cluster. */
+            if (sender &&
+                !(flags & CLUSTER_NODE_NOADDR) &&
+                !clusterBlacklistExists(g->nodename))
+            {
+                clusterNode *node;
+                node = createClusterNode(g->nodename, flags);
+                memcpy(node->ip,g->ip,NET_IP_STR_LEN);
+                node->tcp_port = msg_tcp_port;
+                node->tls_port = msg_tls_port;
+                node->cport = ntohs(g->cport);
+                clusterAddNode(node);
+                clusterAddNodeToShard(node->shard_id, node);
+            }
+        }
+
+        /* Next node */
+        g++;
+    }
+}
+
+/* IP -> string conversion. 'buf' is supposed to at least be 46 bytes.
+ * If 'announced_ip' length is non-zero, it is used instead of extracting
+ * the IP from the socket peer address. */
+int nodeIp2String(char *buf, clusterLink *link, char *announced_ip) {
+    if (announced_ip[0] != '\0') {
+        memcpy(buf,announced_ip,NET_IP_STR_LEN);
+        buf[NET_IP_STR_LEN-1] = '\0'; /* We are not sure the input is sane. */
+        return C_OK;
+    } else {
+        if (connAddrPeerName(link->conn, buf, NET_IP_STR_LEN, NULL) == -1) {
+            serverLog(LL_NOTICE, "Error converting peer IP to string: %s",
+                link->conn ? connGetLastError(link->conn) : "no link");
+            return C_ERR;
+        }
+        return C_OK;
+    }
+}
+
+/* Update the node address to the IP address that can be extracted
+ * from link->fd, or if hdr->myip is non empty, to the address the node
+ * is announcing us. The port is taken from the packet header as well.
+ *
+ * If the address or port changed, disconnect the node link so that we'll
+ * connect again to the new address.
+ *
+ * If the ip/port pair are already correct no operation is performed at
+ * all.
+ *
+ * The function returns 0 if the node address is still the same,
+ * otherwise 1 is returned. */
+int nodeUpdateAddressIfNeeded(clusterNode *node, clusterLink *link,
+                              clusterMsg *hdr)
+{
+    char ip[NET_IP_STR_LEN] = {0};
+    int cport = ntohs(hdr->cport);
+    int tcp_port, tls_port;
+    getClientPortFromClusterMsg(hdr, &tls_port, &tcp_port);
+
+    /* We don't proceed if the link is the same as the sender link, as this
+     * function is designed to see if the node link is consistent with the
+     * symmetric link that is used to receive PINGs from the node.
+     *
+     * As a side effect this function never frees the passed 'link', so
+     * it is safe to call during packet processing. */
+    if (link == node->link) return 0;
+
+    /* If the peer IP is unavailable for some reasons like invalid fd or closed
+     * link, just give up the update this time, and the update will be retried
+     * in the next round of PINGs */
+    if (nodeIp2String(ip,link,hdr->myip) == C_ERR) return 0;
+
+    if (node->tcp_port == tcp_port && node->cport == cport && node->tls_port == tls_port &&
+        strcmp(ip,node->ip) == 0) return 0;
+
+    /* IP / port is different, update it. */
+    memcpy(node->ip,ip,sizeof(ip));
+    node->tcp_port = tcp_port;
+    node->tls_port = tls_port;
+    node->cport = cport;
+    if (node->link) freeClusterLink(node->link);
+    node->flags &= ~CLUSTER_NODE_NOADDR;
+    serverLog(LL_NOTICE,"Address updated for node %.40s (%s), now %s:%d",
+        node->name, node->human_nodename, node->ip, getNodeDefaultClientPort(node)); 
+
+    /* Check if this is our master and we have to change the
+     * replication target as well. */
+    if (nodeIsSlave(myself) && myself->slaveof == node)
+        replicationSetMaster(node->ip, getNodeDefaultReplicationPort(node));
+    return 1;
+}
+
+/* Reconfigure the specified node 'n' as a master. This function is called when
+ * a node that we believed to be a slave is now acting as master in order to
+ * update the state of the node. */
+void clusterSetNodeAsMaster(clusterNode *n) {
+    if (clusterNodeIsMaster(n)) return;
+
+    if (n->slaveof) {
+        clusterNodeRemoveSlave(n->slaveof,n);
+        if (n != myself) n->flags |= CLUSTER_NODE_MIGRATE_TO;
+    }
+    n->flags &= ~CLUSTER_NODE_SLAVE;
+    n->flags |= CLUSTER_NODE_MASTER;
+    n->slaveof = NULL;
+
+    /* Update config and state. */
+    clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+                         CLUSTER_TODO_UPDATE_STATE);
+}
+
+/* This function is called when we receive a master configuration via a
+ * PING, PONG or UPDATE packet. What we receive is a node, a configEpoch of the
+ * node, and the set of slots claimed under this configEpoch.
+ *
+ * What we do is to rebind the slots with newer configuration compared to our
+ * local configuration, and if needed, we turn ourself into a replica of the
+ * node (see the function comments for more info).
+ *
+ * The 'sender' is the node for which we received a configuration update.
+ * Sometimes it is not actually the "Sender" of the information, like in the
+ * case we receive the info via an UPDATE packet. */
+void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoch, unsigned char *slots) {
+    int j;
+    clusterNode *curmaster = NULL, *newmaster = NULL;
+    /* The dirty slots list is a list of slots for which we lose the ownership
+     * while having still keys inside. This usually happens after a failover
+     * or after a manual cluster reconfiguration operated by the admin.
+     *
+     * If the update message is not able to demote a master to slave (in this
+     * case we'll resync with the master updating the whole key space), we
+     * need to delete all the keys in the slots we lost ownership. */
+    uint16_t dirty_slots[CLUSTER_SLOTS];
+    int dirty_slots_count = 0;
+
+    /* We should detect if sender is new master of our shard.
+     * We will know it if all our slots were migrated to sender, and sender
+     * has no slots except ours */
+    int sender_slots = 0;
+    int migrated_our_slots = 0;
+
+    /* Here we set curmaster to this node or the node this node
+     * replicates to if it's a slave. In the for loop we are
+     * interested to check if slots are taken away from curmaster. */
+    curmaster = clusterNodeIsMaster(myself) ? myself : myself->slaveof;
+
+    if (sender == myself) {
+        serverLog(LL_NOTICE,"Discarding UPDATE message about myself.");
+        return;
+    }
+
+    for (j = 0; j < CLUSTER_SLOTS; j++) {
+        if (bitmapTestBit(slots,j)) {
+            sender_slots++;
+
+            /* The slot is already bound to the sender of this message. */
+            if (server.cluster->slots[j] == sender) {
+                bitmapClearBit(server.cluster->owner_not_claiming_slot, j);
+                continue;
+            }
+
+            /* The slot is in importing state, it should be modified only
+             * manually via redis-cli (example: a resharding is in progress
+             * and the migrating side slot was already closed and is advertising
+             * a new config. We still want the slot to be closed manually). */
+            if (server.cluster->importing_slots_from[j]) continue;
+
+            /* We rebind the slot to the new node claiming it if:
+             * 1) The slot was unassigned or the previous owner no longer owns the slot or
+             *    the new node claims it with a greater configEpoch.
+             * 2) We are not currently importing the slot. */
+            if (isSlotUnclaimed(j) ||
+                server.cluster->slots[j]->configEpoch < senderConfigEpoch)
+            {
+                /* Was this slot mine, and still contains keys? Mark it as
+                 * a dirty slot. */
+                if (server.cluster->slots[j] == myself &&
+                    countKeysInSlot(j) &&
+                    sender != myself)
+                {
+                    dirty_slots[dirty_slots_count] = j;
+                    dirty_slots_count++;
+                }
+
+                if (server.cluster->slots[j] == curmaster) {
+                    newmaster = sender;
+                    migrated_our_slots++;
+                }
+                clusterDelSlot(j);
+                clusterAddSlot(sender,j);
+                clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+                                     CLUSTER_TODO_UPDATE_STATE|
+                                     CLUSTER_TODO_FSYNC_CONFIG);
+            }
+        } else if (server.cluster->slots[j] == sender) {
+            /* The slot is currently bound to the sender but the sender is no longer
+             * claiming it. We don't want to unbind the slot yet as it can cause the cluster
+             * to move to FAIL state and also throw client error. Keeping the slot bound to
+             * the previous owner will cause a few client side redirects, but won't throw
+             * any errors. We will keep track of the uncertainty in ownership to avoid
+             * propagating misinformation about this slot's ownership using UPDATE
+             * messages. */
+            bitmapSetBit(server.cluster->owner_not_claiming_slot, j);
+        }
+    }
+
+    /* After updating the slots configuration, don't do any actual change
+     * in the state of the server if a module disabled Redis Cluster
+     * keys redirections. */
+    if (server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_REDIRECTION)
+        return;
+
+    /* If at least one slot was reassigned from a node to another node
+     * with a greater configEpoch, it is possible that:
+     * 1) We are a master left without slots. This means that we were
+     *    failed over and we should turn into a replica of the new
+     *    master.
+     * 2) We are a slave and our master is left without slots. We need
+     *    to replicate to the new slots owner. */
+    if (newmaster && curmaster->numslots == 0 &&
+            (server.cluster_allow_replica_migration ||
+             sender_slots == migrated_our_slots)) {
+        serverLog(LL_NOTICE,
+            "Configuration change detected. Reconfiguring myself "
+            "as a replica of %.40s (%s)", sender->name, sender->human_nodename);
+        clusterSetMaster(sender);
+        clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+                             CLUSTER_TODO_UPDATE_STATE|
+                             CLUSTER_TODO_FSYNC_CONFIG);
+    } else if (myself->slaveof && myself->slaveof->slaveof &&
+               /* In some rare case when CLUSTER FAILOVER TAKEOVER is used, it
+                * can happen that myself is a replica of a replica of myself. If
+                * this happens, we do nothing to avoid a crash and wait for the
+                * admin to repair the cluster. */
+               myself->slaveof->slaveof != myself)
+    {
+        /* Safeguard against sub-replicas. A replica's master can turn itself
+         * into a replica if its last slot is removed. If no other node takes
+         * over the slot, there is nothing else to trigger replica migration. */
+        serverLog(LL_NOTICE,
+                  "I'm a sub-replica! Reconfiguring myself as a replica of grandmaster %.40s (%s)",
+                  myself->slaveof->slaveof->name, myself->slaveof->slaveof->human_nodename);
+        clusterSetMaster(myself->slaveof->slaveof);
+        clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+                             CLUSTER_TODO_UPDATE_STATE|
+                             CLUSTER_TODO_FSYNC_CONFIG);
+    } else if (dirty_slots_count) {
+        /* If we are here, we received an update message which removed
+         * ownership for certain slots we still have keys about, but still
+         * we are serving some slots, so this master node was not demoted to
+         * a slave.
+         *
+         * In order to maintain a consistent state between keys and slots
+         * we need to remove all the keys from the slots we lost. */
+        for (j = 0; j < dirty_slots_count; j++)
+            delKeysInSlot(dirty_slots[j]);
+    }
+}
+
+/* Cluster ping extensions.
+ *
+ * The ping/pong/meet messages support arbitrary extensions to add additional
+ * metadata to the messages that are sent between the various nodes in the
+ * cluster. The extensions take the form:
+ * [ Header length + type (8 bytes) ] 
+ * [ Extension information (Arbitrary length, but must be 8 byte padded) ]
+ */
+
+
+/* Returns the length of a given extension */
+static uint32_t getPingExtLength(clusterMsgPingExt *ext) {
+    return ntohl(ext->length);
+}
+
+/* Returns the initial position of ping extensions. May return an invalid
+ * address if there are no ping extensions. */
+static clusterMsgPingExt *getInitialPingExt(clusterMsg *hdr, int count) {
+    clusterMsgPingExt *initial = (clusterMsgPingExt*) &(hdr->data.ping.gossip[count]);
+    return initial;
+} 
+
+/* Given a current ping extension, returns the start of the next extension. May return
+ * an invalid address if there are no further ping extensions. */
+static clusterMsgPingExt *getNextPingExt(clusterMsgPingExt *ext) {
+    clusterMsgPingExt *next = (clusterMsgPingExt *) (((char *) ext) + getPingExtLength(ext));
+    return next;
+}
+
+/* All PING extensions must be 8-byte aligned */
+uint32_t getAlignedPingExtSize(uint32_t dataSize) {
+
+    return sizeof(clusterMsgPingExt) + EIGHT_BYTE_ALIGN(dataSize);
+}
+
+uint32_t getHostnamePingExtSize(void) {
+    if (sdslen(myself->hostname) == 0) {
+        return 0;
+    }
+    return getAlignedPingExtSize(sdslen(myself->hostname) + 1);
+}
+
+uint32_t getHumanNodenamePingExtSize(void) {
+    if (sdslen(myself->human_nodename) == 0) {
+        return 0;
+    }
+    return getAlignedPingExtSize(sdslen(myself->human_nodename) + 1);
+}
+
+uint32_t getShardIdPingExtSize(void) {
+    return getAlignedPingExtSize(sizeof(clusterMsgPingExtShardId));
+}
+
+uint32_t getForgottenNodeExtSize(void) {
+    return getAlignedPingExtSize(sizeof(clusterMsgPingExtForgottenNode));
+}
+
+void *preparePingExt(clusterMsgPingExt *ext, uint16_t type, uint32_t length) {
+    ext->type = htons(type);
+    ext->length = htonl(length);
+    return &ext->ext[0];
+}
+
+clusterMsgPingExt *nextPingExt(clusterMsgPingExt *ext) {
+    return (clusterMsgPingExt *)((char*)ext + ntohl(ext->length));
+}
+
+/* 1. If a NULL hdr is provided, compute the extension size;
+ * 2. If a non-NULL hdr is provided, write the hostname ping
+ *    extension at the start of the cursor. This function
+ *    will update the cursor to point to the end of the
+ *    written extension and will return the amount of bytes
+ *    written. */
+uint32_t writePingExt(clusterMsg *hdr, int gossipcount)  {
+    uint16_t extensions = 0;
+    uint32_t totlen = 0;
+    clusterMsgPingExt *cursor = NULL;
+    /* Set the initial extension position */
+    if (hdr != NULL) {
+        cursor = getInitialPingExt(hdr, gossipcount);
+    }
+
+    /* hostname is optional */
+    if (sdslen(myself->hostname) != 0) {
+        if (cursor != NULL) {
+            /* Populate hostname */
+            clusterMsgPingExtHostname *ext = preparePingExt(cursor, CLUSTERMSG_EXT_TYPE_HOSTNAME, getHostnamePingExtSize());
+            memcpy(ext->hostname, myself->hostname, sdslen(myself->hostname));
+
+            /* Move the write cursor */
+            cursor = nextPingExt(cursor);
+        }
+
+        totlen += getHostnamePingExtSize();
+        extensions++;
+    }
+
+    if (sdslen(myself->human_nodename) != 0) {
+        if (cursor != NULL) {
+            /* Populate human_nodename */
+            clusterMsgPingExtHumanNodename *ext = preparePingExt(cursor, CLUSTERMSG_EXT_TYPE_HUMAN_NODENAME, getHumanNodenamePingExtSize());
+            memcpy(ext->human_nodename, myself->human_nodename, sdslen(myself->human_nodename));
+        
+            /* Move the write cursor */
+            cursor = nextPingExt(cursor);
+        }
+
+        totlen += getHumanNodenamePingExtSize();
+        extensions++;
+    }
+
+    /* Gossip forgotten nodes */
+    if (dictSize(server.cluster->nodes_black_list) > 0) {
+        dictIterator *di = dictGetIterator(server.cluster->nodes_black_list);
+        dictEntry *de;
+        while ((de = dictNext(di)) != NULL) {
+            if (cursor != NULL) {
+                uint64_t expire = dictGetUnsignedIntegerVal(de);
+                if ((time_t)expire < server.unixtime) continue; /* already expired */
+                uint64_t ttl = expire - server.unixtime;
+                clusterMsgPingExtForgottenNode *ext = preparePingExt(cursor, CLUSTERMSG_EXT_TYPE_FORGOTTEN_NODE, getForgottenNodeExtSize());
+                memcpy(ext->name, dictGetKey(de), CLUSTER_NAMELEN);
+                ext->ttl = htonu64(ttl);
+
+                /* Move the write cursor */
+                cursor = nextPingExt(cursor);
+            }
+            totlen += getForgottenNodeExtSize();
+            extensions++;
+        }
+        dictReleaseIterator(di);
+    }
+
+    /* Populate shard_id */
+    if (cursor != NULL) {
+        clusterMsgPingExtShardId *ext = preparePingExt(cursor, CLUSTERMSG_EXT_TYPE_SHARDID, getShardIdPingExtSize());
+        memcpy(ext->shard_id, myself->shard_id, CLUSTER_NAMELEN);
+
+        /* Move the write cursor */
+        cursor = nextPingExt(cursor);
+    }
+    totlen += getShardIdPingExtSize();
+    extensions++;
+
+    if (hdr != NULL) {
+        if (extensions != 0) {
+            hdr->mflags[0] |= CLUSTERMSG_FLAG0_EXT_DATA;
+        }
+        hdr->extensions = htons(extensions);
+    }
+
+    return totlen;
+}
+
+/* We previously validated the extensions, so this function just needs to
+ * handle the extensions. */
+void clusterProcessPingExtensions(clusterMsg *hdr, clusterLink *link) {
+    clusterNode *sender = link->node ? link->node : clusterLookupNode(hdr->sender, CLUSTER_NAMELEN);
+    char *ext_hostname = NULL;
+    char *ext_humannodename = NULL;
+    char *ext_shardid = NULL;
+    uint16_t extensions = ntohs(hdr->extensions);
+    /* Loop through all the extensions and process them */
+    clusterMsgPingExt *ext = getInitialPingExt(hdr, ntohs(hdr->count));
+    while (extensions--) {
+        uint16_t type = ntohs(ext->type);
+        if (type == CLUSTERMSG_EXT_TYPE_HOSTNAME) {
+            clusterMsgPingExtHostname *hostname_ext = (clusterMsgPingExtHostname *) &(ext->ext[0].hostname);
+            ext_hostname = hostname_ext->hostname;
+        } else if (type == CLUSTERMSG_EXT_TYPE_HUMAN_NODENAME) {
+            clusterMsgPingExtHumanNodename *humannodename_ext = (clusterMsgPingExtHumanNodename *) &(ext->ext[0].human_nodename);
+            ext_humannodename = humannodename_ext->human_nodename;
+        } else if (type == CLUSTERMSG_EXT_TYPE_FORGOTTEN_NODE) {
+            clusterMsgPingExtForgottenNode *forgotten_node_ext = &(ext->ext[0].forgotten_node);
+            clusterNode *n = clusterLookupNode(forgotten_node_ext->name, CLUSTER_NAMELEN);
+            if (n && n != myself && !(nodeIsSlave(myself) && myself->slaveof == n)) {
+                sds id = sdsnewlen(forgotten_node_ext->name, CLUSTER_NAMELEN);
+                dictEntry *de = dictAddOrFind(server.cluster->nodes_black_list, id);
+                uint64_t expire = server.unixtime + ntohu64(forgotten_node_ext->ttl);
+                dictSetUnsignedIntegerVal(de, expire);
+                clusterDelNode(n);
+                clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|
+                                     CLUSTER_TODO_SAVE_CONFIG);
+            }
+        } else if (type == CLUSTERMSG_EXT_TYPE_SHARDID) {
+            clusterMsgPingExtShardId *shardid_ext = (clusterMsgPingExtShardId *) &(ext->ext[0].shard_id);
+            ext_shardid = shardid_ext->shard_id;
+        } else {
+            /* Unknown type, we will ignore it but log what happened. */
+            serverLog(LL_WARNING, "Received unknown extension type %d", type);
+        }
+
+        /* We know this will be valid since we validated it ahead of time */
+        ext = getNextPingExt(ext);
+    }
+
+    /* If the node did not send us a hostname extension, assume
+     * they don't have an announced hostname. Otherwise, we'll
+     * set it now. */
+    updateAnnouncedHostname(sender, ext_hostname);
+    updateAnnouncedHumanNodename(sender, ext_humannodename);
+    /* If the node did not send us a shard-id extension, it means the sender
+     * does not support it (old version), node->shard_id is randomly generated.
+     * A cluster-wide consensus for the node's shard_id is not necessary.
+     * The key is maintaining consistency of the shard_id on each individual 7.2 node.
+     * As the cluster progressively upgrades to version 7.2, we can expect the shard_ids
+     * across all nodes to naturally converge and align.
+     *
+     * If sender is a replica, set the shard_id to the shard_id of its master.
+     * Otherwise, we'll set it now. */
+    if (ext_shardid == NULL) ext_shardid = clusterNodeGetMaster(sender)->shard_id;
+
+    updateShardId(sender, ext_shardid);
+}
+
+static clusterNode *getNodeFromLinkAndMsg(clusterLink *link, clusterMsg *hdr) {
+    clusterNode *sender;
+    if (link->node && !nodeInHandshake(link->node)) {
+        /* If the link has an associated node, use that so that we don't have to look it
+         * up every time, except when the node is still in handshake, the node still has
+         * a random name thus not truly "known". */
+        sender = link->node;
+    } else {
+        /* Otherwise, fetch sender based on the message */
+        sender = clusterLookupNode(hdr->sender, CLUSTER_NAMELEN);
+        /* We know the sender node but haven't associate it with the link. This must
+         * be an inbound link because only for inbound links we didn't know which node
+         * to associate when they were created. */
+        if (sender && !link->node) {
+            setClusterNodeToInboundClusterLink(sender, link);
+        }
+    }
+    return sender;
+}
+
+/* When this function is called, there is a packet to process starting
+ * at link->rcvbuf. Releasing the buffer is up to the caller, so this
+ * function should just handle the higher level stuff of processing the
+ * packet, modifying the cluster state if needed.
+ *
+ * The function returns 1 if the link is still valid after the packet
+ * was processed, otherwise 0 if the link was freed since the packet
+ * processing lead to some inconsistency error (for instance a PONG
+ * received from the wrong sender ID). */
+int clusterProcessPacket(clusterLink *link) {
+    clusterMsg *hdr = (clusterMsg*) link->rcvbuf;
+    uint32_t totlen = ntohl(hdr->totlen);
+    uint16_t type = ntohs(hdr->type);
+    mstime_t now = mstime();
+
+    if (type < CLUSTERMSG_TYPE_COUNT)
+        server.cluster->stats_bus_messages_received[type]++;
+    serverLog(LL_DEBUG,"--- Processing packet of type %s, %lu bytes",
+        clusterGetMessageTypeString(type), (unsigned long) totlen);
+
+    /* Perform sanity checks */
+    if (totlen < 16) return 1; /* At least signature, version, totlen, count. */
+    if (totlen > link->rcvbuf_len) return 1;
+
+    if (ntohs(hdr->ver) != CLUSTER_PROTO_VER) {
+        /* Can't handle messages of different versions. */
+        return 1;
+    }
+
+    if (type == server.cluster_drop_packet_filter) {
+        serverLog(LL_WARNING, "Dropping packet that matches debug drop filter");
+        return 1;
+    }
+
+    uint16_t flags = ntohs(hdr->flags);
+    uint16_t extensions = ntohs(hdr->extensions);
+    uint64_t senderCurrentEpoch = 0, senderConfigEpoch = 0;
+    uint32_t explen; /* expected length of this packet */
+    clusterNode *sender;
+
+    if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_PONG ||
+        type == CLUSTERMSG_TYPE_MEET)
+    {
+        uint16_t count = ntohs(hdr->count);
+
+        explen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+        explen += (sizeof(clusterMsgDataGossip)*count);
+
+        /* If there is extension data, which doesn't have a fixed length,
+         * loop through them and validate the length of it now. */
+        if (hdr->mflags[0] & CLUSTERMSG_FLAG0_EXT_DATA) {
+            clusterMsgPingExt *ext = getInitialPingExt(hdr, count);
+            while (extensions--) {
+                uint16_t extlen = getPingExtLength(ext);
+                if (extlen % 8 != 0) {
+                    serverLog(LL_WARNING, "Received a %s packet without proper padding (%d bytes)",
+                        clusterGetMessageTypeString(type), (int) extlen);
+                    return 1;
+                }
+                if ((totlen - explen) < extlen) {
+                    serverLog(LL_WARNING, "Received invalid %s packet with extension data that exceeds "
+                        "total packet length (%lld)", clusterGetMessageTypeString(type),
+                        (unsigned long long) totlen);
+                    return 1;
+                }
+                explen += extlen;
+                ext = getNextPingExt(ext);
+            }
+        }
+    } else if (type == CLUSTERMSG_TYPE_FAIL) {
+        explen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+        explen += sizeof(clusterMsgDataFail);
+    } else if (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) {
+        explen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+        explen += sizeof(clusterMsgDataPublish) -
+                8 +
+                ntohl(hdr->data.publish.msg.channel_len) +
+                ntohl(hdr->data.publish.msg.message_len);
+    } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST ||
+               type == CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK ||
+               type == CLUSTERMSG_TYPE_MFSTART)
+    {
+        explen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+    } else if (type == CLUSTERMSG_TYPE_UPDATE) {
+        explen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+        explen += sizeof(clusterMsgDataUpdate);
+    } else if (type == CLUSTERMSG_TYPE_MODULE) {
+        explen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+        explen += sizeof(clusterMsgModule) -
+                3 + ntohl(hdr->data.module.msg.len);
+    } else {
+        /* We don't know this type of packet, so we assume it's well formed. */
+        explen = totlen;
+    }
+
+    if (totlen != explen) {
+        serverLog(LL_WARNING, "Received invalid %s packet of length %lld but expected length %lld",
+            clusterGetMessageTypeString(type), (unsigned long long) totlen, (unsigned long long) explen);
+        return 1;
+    }
+
+    sender = getNodeFromLinkAndMsg(link, hdr);
+
+    /* Update the last time we saw any data from this node. We
+     * use this in order to avoid detecting a timeout from a node that
+     * is just sending a lot of data in the cluster bus, for instance
+     * because of Pub/Sub. */
+    if (sender) sender->data_received = now;
+
+    if (sender && !nodeInHandshake(sender)) {
+        /* Update our currentEpoch if we see a newer epoch in the cluster. */
+        senderCurrentEpoch = ntohu64(hdr->currentEpoch);
+        senderConfigEpoch = ntohu64(hdr->configEpoch);
+        if (senderCurrentEpoch > server.cluster->currentEpoch)
+            server.cluster->currentEpoch = senderCurrentEpoch;
+        /* Update the sender configEpoch if it is publishing a newer one. */
+        if (senderConfigEpoch > sender->configEpoch) {
+            sender->configEpoch = senderConfigEpoch;
+            clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+                                 CLUSTER_TODO_FSYNC_CONFIG);
+        }
+        /* Update the replication offset info for this node. */
+        sender->repl_offset = ntohu64(hdr->offset);
+        sender->repl_offset_time = now;
+        /* If we are a slave performing a manual failover and our master
+         * sent its offset while already paused, populate the MF state. */
+        if (server.cluster->mf_end &&
+            nodeIsSlave(myself) &&
+            myself->slaveof == sender &&
+            hdr->mflags[0] & CLUSTERMSG_FLAG0_PAUSED &&
+            server.cluster->mf_master_offset == -1)
+        {
+            server.cluster->mf_master_offset = sender->repl_offset;
+            clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_MANUALFAILOVER);
+            serverLog(LL_NOTICE,
+                "Received replication offset for paused "
+                "master manual failover: %lld",
+                server.cluster->mf_master_offset);
+        }
+    }
+
+    /* Initial processing of PING and MEET requests replying with a PONG. */
+    if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_MEET) {
+        /* We use incoming MEET messages in order to set the address
+         * for 'myself', since only other cluster nodes will send us
+         * MEET messages on handshakes, when the cluster joins, or
+         * later if we changed address, and those nodes will use our
+         * official address to connect to us. So by obtaining this address
+         * from the socket is a simple way to discover / update our own
+         * address in the cluster without it being hardcoded in the config.
+         *
+         * However if we don't have an address at all, we update the address
+         * even with a normal PING packet. If it's wrong it will be fixed
+         * by MEET later. */
+        if ((type == CLUSTERMSG_TYPE_MEET || myself->ip[0] == '\0') &&
+            server.cluster_announce_ip == NULL)
+        {
+            char ip[NET_IP_STR_LEN];
+
+            if (connAddrSockName(link->conn,ip,sizeof(ip),NULL) != -1 &&
+                strcmp(ip,myself->ip))
+            {
+                memcpy(myself->ip,ip,NET_IP_STR_LEN);
+                serverLog(LL_NOTICE,"IP address for this node updated to %s",
+                    myself->ip);
+                clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
+            }
+        }
+
+        /* Add this node if it is new for us and the msg type is MEET.
+         * In this stage we don't try to add the node with the right
+         * flags, slaveof pointer, and so forth, as this details will be
+         * resolved when we'll receive PONGs from the node. */
+        if (!sender && type == CLUSTERMSG_TYPE_MEET) {
+            clusterNode *node;
+
+            node = createClusterNode(NULL,CLUSTER_NODE_HANDSHAKE);
+            serverAssert(nodeIp2String(node->ip,link,hdr->myip) == C_OK);
+            getClientPortFromClusterMsg(hdr, &node->tls_port, &node->tcp_port);
+            node->cport = ntohs(hdr->cport);
+            clusterAddNode(node);
+            clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
+        }
+
+        /* If this is a MEET packet from an unknown node, we still process
+         * the gossip section here since we have to trust the sender because
+         * of the message type. */
+        if (!sender && type == CLUSTERMSG_TYPE_MEET)
+            clusterProcessGossipSection(hdr,link);
+
+        /* Anyway reply with a PONG */
+        clusterSendPing(link,CLUSTERMSG_TYPE_PONG);
+    }
+
+    /* PING, PONG, MEET: process config information. */
+    if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_PONG ||
+        type == CLUSTERMSG_TYPE_MEET)
+    {
+        serverLog(LL_DEBUG,"%s packet received: %.40s",
+            clusterGetMessageTypeString(type),
+            link->node ? link->node->name : "NULL");
+        if (!link->inbound) {
+            if (nodeInHandshake(link->node)) {
+                /* If we already have this node, try to change the
+                 * IP/port of the node with the new one. */
+                if (sender) {
+                    serverLog(LL_VERBOSE,
+                        "Handshake: we already know node %.40s (%s), "
+                        "updating the address if needed.", sender->name, sender->human_nodename);
+                    if (nodeUpdateAddressIfNeeded(sender,link,hdr))
+                    {
+                        clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+                                             CLUSTER_TODO_UPDATE_STATE);
+                    }
+                    /* Free this node as we already have it. This will
+                     * cause the link to be freed as well. */
+                    clusterDelNode(link->node);
+                    return 0;
+                }
+
+                /* First thing to do is replacing the random name with the
+                 * right node name if this was a handshake stage. */
+                clusterRenameNode(link->node, hdr->sender);
+                serverLog(LL_DEBUG,"Handshake with node %.40s completed.",
+                    link->node->name);
+                link->node->flags &= ~CLUSTER_NODE_HANDSHAKE;
+                link->node->flags |= flags&(CLUSTER_NODE_MASTER|CLUSTER_NODE_SLAVE);
+                clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
+            } else if (memcmp(link->node->name,hdr->sender,
+                        CLUSTER_NAMELEN) != 0)
+            {
+                /* If the reply has a non matching node ID we
+                 * disconnect this node and set it as not having an associated
+                 * address. */
+                serverLog(LL_DEBUG,"PONG contains mismatching sender ID. About node %.40s added %d ms ago, having flags %d",
+                    link->node->name,
+                    (int)(now-(link->node->ctime)),
+                    link->node->flags);
+                link->node->flags |= CLUSTER_NODE_NOADDR;
+                link->node->ip[0] = '\0';
+                link->node->tcp_port = 0;
+                link->node->tls_port = 0;
+                link->node->cport = 0;
+                freeClusterLink(link);
+                clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
+                return 0;
+            }
+        }
+
+        /* Copy the CLUSTER_NODE_NOFAILOVER flag from what the sender
+         * announced. This is a dynamic flag that we receive from the
+         * sender, and the latest status must be trusted. We need it to
+         * be propagated because the slave ranking used to understand the
+         * delay of each slave in the voting process, needs to know
+         * what are the instances really competing. */
+        if (sender) {
+            int nofailover = flags & CLUSTER_NODE_NOFAILOVER;
+            sender->flags &= ~CLUSTER_NODE_NOFAILOVER;
+            sender->flags |= nofailover;
+        }
+
+        /* Update the node address if it changed. */
+        if (sender && type == CLUSTERMSG_TYPE_PING &&
+            !nodeInHandshake(sender) &&
+            nodeUpdateAddressIfNeeded(sender,link,hdr))
+        {
+            clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+                                 CLUSTER_TODO_UPDATE_STATE);
+        }
+
+        /* Update our info about the node */
+        if (!link->inbound && type == CLUSTERMSG_TYPE_PONG) {
+            link->node->pong_received = now;
+            link->node->ping_sent = 0;
+
+            /* The PFAIL condition can be reversed without external
+             * help if it is momentary (that is, if it does not
+             * turn into a FAIL state).
+             *
+             * The FAIL condition is also reversible under specific
+             * conditions detected by clearNodeFailureIfNeeded(). */
+            if (nodeTimedOut(link->node)) {
+                link->node->flags &= ~CLUSTER_NODE_PFAIL;
+                clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+                                     CLUSTER_TODO_UPDATE_STATE);
+            } else if (nodeFailed(link->node)) {
+                clearNodeFailureIfNeeded(link->node);
+            }
+        }
+
+        /* Check for role switch: slave -> master or master -> slave. */
+        if (sender) {
+            if (!memcmp(hdr->slaveof,CLUSTER_NODE_NULL_NAME,
+                sizeof(hdr->slaveof)))
+            {
+                /* Node is a master. */
+                clusterSetNodeAsMaster(sender);
+            } else {
+                /* Node is a slave. */
+                clusterNode *master = clusterLookupNode(hdr->slaveof, CLUSTER_NAMELEN);
+
+                if (clusterNodeIsMaster(sender)) {
+                    /* Master turned into a slave! Reconfigure the node. */
+                    if (master && !memcmp(master->shard_id, sender->shard_id, CLUSTER_NAMELEN)) {
+                        /* `sender` was a primary and was in the same shard as `master`, its new primary */
+                        if (sender->configEpoch > senderConfigEpoch) {
+                            serverLog(LL_NOTICE,
+                                    "Ignore stale message from %.40s (%s) in shard %.40s;"
+                                    " gossip config epoch: %llu, current config epoch: %llu", 
+                                    sender->name,
+                                    sender->human_nodename,
+                                    sender->shard_id,
+                                    (unsigned long long)senderConfigEpoch,
+                                    (unsigned long long)sender->configEpoch);
+                        } else {
+                            /* A failover occurred in the shard where `sender` belongs to and `sender` is no longer
+                             * a primary. Update slot assignment to `master`, which is the new primary in the shard */
+                            int slots = clusterMoveNodeSlots(sender, master);
+                            /* `master` is still a `slave` in this observer node's view; update its role and configEpoch */
+                            clusterSetNodeAsMaster(master);
+                            master->configEpoch = senderConfigEpoch;
+                            serverLog(LL_NOTICE, "A failover occurred in shard %.40s; node %.40s (%s)"
+                                    " lost %d slot(s) to node %.40s (%s) with a config epoch of %llu",
+                                    sender->shard_id,
+                                    sender->name,
+                                    sender->human_nodename,
+                                    slots,
+                                    master->name,
+                                    master->human_nodename,
+                                    (unsigned long long) master->configEpoch);
+                        }
+                    } else {
+                        /* `sender` was moved to another shard and has become a replica, remove its slot assignment */
+                        int slots = clusterDelNodeSlots(sender);
+                        serverLog(LL_NOTICE, "Node %.40s (%s) is no longer master of shard %.40s;"
+                                " removed all %d slot(s) it used to own",
+                                sender->name,
+                                sender->human_nodename,
+                                sender->shard_id,
+                                slots);
+                       if (master != NULL) {
+                           serverLog(LL_NOTICE, "Node %.40s (%s) is now part of shard %.40s",
+                                   sender->name,
+                                   sender->human_nodename,
+                                   master->shard_id);
+                        }
+                    }
+                    sender->flags &= ~(CLUSTER_NODE_MASTER|
+                                       CLUSTER_NODE_MIGRATE_TO);
+                    sender->flags |= CLUSTER_NODE_SLAVE;
+
+                    /* Update config and state. */
+                    clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+                                         CLUSTER_TODO_UPDATE_STATE);
+                }
+
+                /* Master node changed for this slave? */
+                if (master && sender->slaveof != master) {
+                    if (sender->slaveof)
+                        clusterNodeRemoveSlave(sender->slaveof,sender);
+                    clusterNodeAddSlave(master,sender);
+                    sender->slaveof = master;
+
+                    /* Update the shard_id when a replica is connected to its
+                     * primary in the very first time. */
+                    updateShardId(sender, master->shard_id);
+
+                    /* Update config. */
+                    clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
+                }
+            }
+        }
+
+        /* Update our info about served slots.
+         *
+         * Note: this MUST happen after we update the master/slave state
+         * so that CLUSTER_NODE_MASTER flag will be set. */
+
+        /* Many checks are only needed if the set of served slots this
+         * instance claims is different compared to the set of slots we have
+         * for it. Check this ASAP to avoid other computational expansive
+         * checks later. */
+        clusterNode *sender_master = NULL; /* Sender or its master if slave. */
+        int dirty_slots = 0; /* Sender claimed slots don't match my view? */
+
+        if (sender) {
+            sender_master = clusterNodeIsMaster(sender) ? sender : sender->slaveof;
+            if (sender_master) {
+                dirty_slots = memcmp(sender_master->slots,
+                        hdr->myslots,sizeof(hdr->myslots)) != 0;
+            }
+        }
+
+        /* 1) If the sender of the message is a master, and we detected that
+         *    the set of slots it claims changed, scan the slots to see if we
+         *    need to update our configuration. */
+        if (sender && clusterNodeIsMaster(sender) && dirty_slots)
+            clusterUpdateSlotsConfigWith(sender,senderConfigEpoch,hdr->myslots);
+
+        /* 2) We also check for the reverse condition, that is, the sender
+         *    claims to serve slots we know are served by a master with a
+         *    greater configEpoch. If this happens we inform the sender.
+         *
+         * This is useful because sometimes after a partition heals, a
+         * reappearing master may be the last one to claim a given set of
+         * hash slots, but with a configuration that other instances know to
+         * be deprecated. Example:
+         *
+         * A and B are master and slave for slots 1,2,3.
+         * A is partitioned away, B gets promoted.
+         * B is partitioned away, and A returns available.
+         *
+         * Usually B would PING A publishing its set of served slots and its
+         * configEpoch, but because of the partition B can't inform A of the
+         * new configuration, so other nodes that have an updated table must
+         * do it. In this way A will stop to act as a master (or can try to
+         * failover if there are the conditions to win the election). */
+        if (sender && dirty_slots) {
+            int j;
+
+            for (j = 0; j < CLUSTER_SLOTS; j++) {
+                if (bitmapTestBit(hdr->myslots,j)) {
+                    if (server.cluster->slots[j] == sender ||
+                        isSlotUnclaimed(j)) continue;
+                    if (server.cluster->slots[j]->configEpoch >
+                        senderConfigEpoch)
+                    {
+                        serverLog(LL_VERBOSE,
+                            "Node %.40s has old slots configuration, sending "
+                            "an UPDATE message about %.40s",
+                                sender->name, server.cluster->slots[j]->name);
+                        clusterSendUpdate(sender->link,
+                            server.cluster->slots[j]);
+
+                        /* TODO: instead of exiting the loop send every other
+                         * UPDATE packet for other nodes that are the new owner
+                         * of sender's slots. */
+                        break;
+                    }
+                }
+            }
+        }
+
+        /* If our config epoch collides with the sender's try to fix
+         * the problem. */
+        if (sender && clusterNodeIsMaster(myself) && clusterNodeIsMaster(sender) &&
+            senderConfigEpoch == myself->configEpoch)
+        {
+            clusterHandleConfigEpochCollision(sender);
+        }
+
+        /* Get info from the gossip section */
+        if (sender) {
+            clusterProcessGossipSection(hdr,link);
+            clusterProcessPingExtensions(hdr,link);
+        }
+    } else if (type == CLUSTERMSG_TYPE_FAIL) {
+        clusterNode *failing;
+
+        if (sender) {
+            failing = clusterLookupNode(hdr->data.fail.about.nodename, CLUSTER_NAMELEN);
+            if (failing &&
+                !(failing->flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_MYSELF)))
+            {
+                serverLog(LL_NOTICE,
+                    "FAIL message received from %.40s (%s) about %.40s (%s)",
+                    hdr->sender, sender->human_nodename, hdr->data.fail.about.nodename, failing->human_nodename);
+                failing->flags |= CLUSTER_NODE_FAIL;
+                failing->fail_time = now;
+                failing->flags &= ~CLUSTER_NODE_PFAIL;
+                clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+                                     CLUSTER_TODO_UPDATE_STATE);
+            }
+        } else {
+            serverLog(LL_NOTICE,
+                "Ignoring FAIL message from unknown node %.40s about %.40s",
+                hdr->sender, hdr->data.fail.about.nodename);
+        }
+    } else if (type == CLUSTERMSG_TYPE_PUBLISH || type == CLUSTERMSG_TYPE_PUBLISHSHARD) {
+        if (!sender) return 1;  /* We don't know that node. */
+
+        robj *channel, *message;
+        uint32_t channel_len, message_len;
+
+        /* Don't bother creating useless objects if there are no
+         * Pub/Sub subscribers. */
+        if ((type == CLUSTERMSG_TYPE_PUBLISH
+            && serverPubsubSubscriptionCount() > 0)
+        || (type == CLUSTERMSG_TYPE_PUBLISHSHARD
+            && serverPubsubShardSubscriptionCount() > 0))
+        {
+            channel_len = ntohl(hdr->data.publish.msg.channel_len);
+            message_len = ntohl(hdr->data.publish.msg.message_len);
+            channel = createStringObject(
+                        (char*)hdr->data.publish.msg.bulk_data,channel_len);
+            message = createStringObject(
+                        (char*)hdr->data.publish.msg.bulk_data+channel_len,
+                        message_len);
+            pubsubPublishMessage(channel, message, type == CLUSTERMSG_TYPE_PUBLISHSHARD);
+            decrRefCount(channel);
+            decrRefCount(message);
+        }
+    } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST) {
+        if (!sender) return 1;  /* We don't know that node. */
+        clusterSendFailoverAuthIfNeeded(sender,hdr);
+    } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK) {
+        if (!sender) return 1;  /* We don't know that node. */
+        /* We consider this vote only if the sender is a master serving
+         * a non zero number of slots, and its currentEpoch is greater or
+         * equal to epoch where this node started the election. */
+        if (clusterNodeIsMaster(sender) && sender->numslots > 0 &&
+            senderCurrentEpoch >= server.cluster->failover_auth_epoch)
+        {
+            server.cluster->failover_auth_count++;
+            /* Maybe we reached a quorum here, set a flag to make sure
+             * we check ASAP. */
+            clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_FAILOVER);
+        }
+    } else if (type == CLUSTERMSG_TYPE_MFSTART) {
+        /* This message is acceptable only if I'm a master and the sender
+         * is one of my slaves. */
+        if (!sender || sender->slaveof != myself) return 1;
+        /* Manual failover requested from slaves. Initialize the state
+         * accordingly. */
+        resetManualFailover();
+        server.cluster->mf_end = now + CLUSTER_MF_TIMEOUT;
+        server.cluster->mf_slave = sender;
+        pauseActions(PAUSE_DURING_FAILOVER,
+                     now + (CLUSTER_MF_TIMEOUT * CLUSTER_MF_PAUSE_MULT),
+                     PAUSE_ACTIONS_CLIENT_WRITE_SET);
+        serverLog(LL_NOTICE,"Manual failover requested by replica %.40s (%s).",
+            sender->name, sender->human_nodename);
+        /* We need to send a ping message to the replica, as it would carry
+         * `server.cluster->mf_master_offset`, which means the master paused clients
+         * at offset `server.cluster->mf_master_offset`, so that the replica would
+         * know that it is safe to set its `server.cluster->mf_can_start` to 1 so as
+         * to complete failover as quickly as possible. */
+        clusterSendPing(link, CLUSTERMSG_TYPE_PING);
+    } else if (type == CLUSTERMSG_TYPE_UPDATE) {
+        clusterNode *n; /* The node the update is about. */
+        uint64_t reportedConfigEpoch =
+                    ntohu64(hdr->data.update.nodecfg.configEpoch);
+
+        if (!sender) return 1;  /* We don't know the sender. */
+        n = clusterLookupNode(hdr->data.update.nodecfg.nodename, CLUSTER_NAMELEN);
+        if (!n) return 1;   /* We don't know the reported node. */
+        if (n->configEpoch >= reportedConfigEpoch) return 1; /* Nothing new. */
+
+        /* If in our current config the node is a slave, set it as a master. */
+        if (nodeIsSlave(n)) clusterSetNodeAsMaster(n);
+
+        /* Update the node's configEpoch. */
+        n->configEpoch = reportedConfigEpoch;
+        clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+                             CLUSTER_TODO_FSYNC_CONFIG);
+
+        /* Check the bitmap of served slots and update our
+         * config accordingly. */
+        clusterUpdateSlotsConfigWith(n,reportedConfigEpoch,
+            hdr->data.update.nodecfg.slots);
+    } else if (type == CLUSTERMSG_TYPE_MODULE) {
+        if (!sender) return 1;  /* Protect the module from unknown nodes. */
+        /* We need to route this message back to the right module subscribed
+         * for the right message type. */
+        uint64_t module_id = hdr->data.module.msg.module_id; /* Endian-safe ID */
+        uint32_t len = ntohl(hdr->data.module.msg.len);
+        uint8_t type = hdr->data.module.msg.type;
+        unsigned char *payload = hdr->data.module.msg.bulk_data;
+        moduleCallClusterReceivers(sender->name,module_id,type,payload,len);
+    } else {
+        serverLog(LL_WARNING,"Received unknown packet type: %d", type);
+    }
+    return 1;
+}
+
+/* This function is called when we detect the link with this node is lost.
+   We set the node as no longer connected. The Cluster Cron will detect
+   this connection and will try to get it connected again.
+
+   Instead if the node is a temporary node used to accept a query, we
+   completely free the node on error. */
+void handleLinkIOError(clusterLink *link) {
+    freeClusterLink(link);
+}
+
+/* Send the messages queued for the link. */
+void clusterWriteHandler(connection *conn) {
+    clusterLink *link = connGetPrivateData(conn);
+    ssize_t nwritten;
+    size_t totwritten = 0;
+
+    while (totwritten < NET_MAX_WRITES_PER_EVENT && listLength(link->send_msg_queue) > 0) {
+        listNode *head = listFirst(link->send_msg_queue);
+        clusterMsgSendBlock *msgblock = (clusterMsgSendBlock*)head->value;
+        clusterMsg *msg = &msgblock->msg;
+        size_t msg_offset = link->head_msg_send_offset;
+        size_t msg_len = ntohl(msg->totlen);
+
+        nwritten = connWrite(conn, (char*)msg + msg_offset, msg_len - msg_offset);
+        if (nwritten <= 0) {
+            serverLog(LL_DEBUG,"I/O error writing to node link: %s",
+                (nwritten == -1) ? connGetLastError(conn) : "short write");
+            handleLinkIOError(link);
+            return;
+        }
+        if (msg_offset + nwritten < msg_len) {
+            /* If full message wasn't written, record the offset
+             * and continue sending from this point next time */
+            link->head_msg_send_offset += nwritten;
+            return;
+        }
+        serverAssert((msg_offset + nwritten) == msg_len);
+        link->head_msg_send_offset = 0;
+
+        /* Delete the node and update our memory tracking */
+        uint32_t blocklen = msgblock->totlen;
+        listDelNode(link->send_msg_queue, head);
+        server.stat_cluster_links_memory -= sizeof(listNode);
+        link->send_msg_queue_mem -= sizeof(listNode) + blocklen;
+
+        totwritten += nwritten;
+    }
+
+    if (listLength(link->send_msg_queue) == 0)
+        connSetWriteHandler(link->conn, NULL);
+}
+
+/* A connect handler that gets called when a connection to another node
+ * gets established.
+ */
+void clusterLinkConnectHandler(connection *conn) {
+    clusterLink *link = connGetPrivateData(conn);
+    clusterNode *node = link->node;
+
+    /* Check if connection succeeded */
+    if (connGetState(conn) != CONN_STATE_CONNECTED) {
+        serverLog(LL_VERBOSE, "Connection with Node %.40s at %s:%d failed: %s",
+                node->name, node->ip, node->cport,
+                connGetLastError(conn));
+        freeClusterLink(link);
+        return;
+    }
+
+    /* Register a read handler from now on */
+    connSetReadHandler(conn, clusterReadHandler);
+
+    /* Queue a PING in the new connection ASAP: this is crucial
+     * to avoid false positives in failure detection.
+     *
+     * If the node is flagged as MEET, we send a MEET message instead
+     * of a PING one, to force the receiver to add us in its node
+     * table. */
+    mstime_t old_ping_sent = node->ping_sent;
+    clusterSendPing(link, node->flags & CLUSTER_NODE_MEET ?
+            CLUSTERMSG_TYPE_MEET : CLUSTERMSG_TYPE_PING);
+    if (old_ping_sent) {
+        /* If there was an active ping before the link was
+         * disconnected, we want to restore the ping time, otherwise
+         * replaced by the clusterSendPing() call. */
+        node->ping_sent = old_ping_sent;
+    }
+    /* We can clear the flag after the first packet is sent.
+     * If we'll never receive a PONG, we'll never send new packets
+     * to this node. Instead after the PONG is received and we
+     * are no longer in meet/handshake status, we want to send
+     * normal PING packets. */
+    node->flags &= ~CLUSTER_NODE_MEET;
+
+    serverLog(LL_DEBUG,"Connecting with Node %.40s at %s:%d",
+            node->name, node->ip, node->cport);
+}
+
+/* Read data. Try to read the first field of the header first to check the
+ * full length of the packet. When a whole packet is in memory this function
+ * will call the function to process the packet. And so forth. */
+void clusterReadHandler(connection *conn) {
+    clusterMsg buf[1];
+    ssize_t nread;
+    clusterMsg *hdr;
+    clusterLink *link = connGetPrivateData(conn);
+    unsigned int readlen, rcvbuflen;
+
+    while(1) { /* Read as long as there is data to read. */
+        rcvbuflen = link->rcvbuf_len;
+        if (rcvbuflen < 8) {
+            /* First, obtain the first 8 bytes to get the full message
+             * length. */
+            readlen = 8 - rcvbuflen;
+        } else {
+            /* Finally read the full message. */
+            hdr = (clusterMsg*) link->rcvbuf;
+            if (rcvbuflen == 8) {
+                /* Perform some sanity check on the message signature
+                 * and length. */
+                if (memcmp(hdr->sig,"RCmb",4) != 0 ||
+                    ntohl(hdr->totlen) < CLUSTERMSG_MIN_LEN)
+                {
+                    char ip[NET_IP_STR_LEN];
+                    int port;
+                    if (connAddrPeerName(conn, ip, sizeof(ip), &port) == -1) {
+                        serverLog(LL_WARNING,
+                            "Bad message length or signature received "
+                            "on the Cluster bus.");
+                    } else {
+                        serverLog(LL_WARNING,
+                            "Bad message length or signature received "
+                            "on the Cluster bus from %s:%d", ip, port);
+                    }
+                    handleLinkIOError(link);
+                    return;
+                }
+            }
+            readlen = ntohl(hdr->totlen) - rcvbuflen;
+            if (readlen > sizeof(buf)) readlen = sizeof(buf);
+        }
+
+        nread = connRead(conn,buf,readlen);
+        if (nread == -1 && (connGetState(conn) == CONN_STATE_CONNECTED)) return; /* No more data ready. */
+
+        if (nread <= 0) {
+            /* I/O error... */
+            serverLog(LL_DEBUG,"I/O error reading from node link: %s",
+                (nread == 0) ? "connection closed" : connGetLastError(conn));
+            handleLinkIOError(link);
+            return;
+        } else {
+            /* Read data and recast the pointer to the new buffer. */
+            size_t unused = link->rcvbuf_alloc - link->rcvbuf_len;
+            if ((size_t)nread > unused) {
+                size_t required = link->rcvbuf_len + nread;
+                size_t prev_rcvbuf_alloc = link->rcvbuf_alloc;
+                /* If less than 1mb, grow to twice the needed size, if larger grow by 1mb. */
+                link->rcvbuf_alloc = required < RCVBUF_MAX_PREALLOC ? required * 2: required + RCVBUF_MAX_PREALLOC;
+                link->rcvbuf = zrealloc(link->rcvbuf, link->rcvbuf_alloc);
+                server.stat_cluster_links_memory += link->rcvbuf_alloc - prev_rcvbuf_alloc;
+            }
+            memcpy(link->rcvbuf + link->rcvbuf_len, buf, nread);
+            link->rcvbuf_len += nread;
+            hdr = (clusterMsg*) link->rcvbuf;
+            rcvbuflen += nread;
+        }
+
+        /* Total length obtained? Process this packet. */
+        if (rcvbuflen >= 8 && rcvbuflen == ntohl(hdr->totlen)) {
+            if (clusterProcessPacket(link)) {
+                if (link->rcvbuf_alloc > RCVBUF_INIT_LEN) {
+                    size_t prev_rcvbuf_alloc = link->rcvbuf_alloc;
+                    zfree(link->rcvbuf);
+                    link->rcvbuf = zmalloc(link->rcvbuf_alloc = RCVBUF_INIT_LEN);
+                    server.stat_cluster_links_memory += link->rcvbuf_alloc - prev_rcvbuf_alloc;
+                }
+                link->rcvbuf_len = 0;
+            } else {
+                return; /* Link no longer valid. */
+            }
+        }
+    }
+}
+
+/* Put the message block into the link's send queue.
+ *
+ * It is guaranteed that this function will never have as a side effect
+ * the link to be invalidated, so it is safe to call this function
+ * from event handlers that will do stuff with the same link later. */
+void clusterSendMessage(clusterLink *link, clusterMsgSendBlock *msgblock) {
+    if (!link) {
+        return;
+    }
+    if (listLength(link->send_msg_queue) == 0 && msgblock->msg.totlen != 0)
+        connSetWriteHandlerWithBarrier(link->conn, clusterWriteHandler, 1);
+
+    listAddNodeTail(link->send_msg_queue, msgblock);
+    msgblock->refcount++;
+
+    /* Update memory tracking */
+    link->send_msg_queue_mem += sizeof(listNode) + msgblock->totlen;
+    server.stat_cluster_links_memory += sizeof(listNode);
+
+    /* Populate sent messages stats. */
+    uint16_t type = ntohs(msgblock->msg.type);
+    if (type < CLUSTERMSG_TYPE_COUNT)
+        server.cluster->stats_bus_messages_sent[type]++;
+}
+
+/* Send a message to all the nodes that are part of the cluster having
+ * a connected link.
+ *
+ * It is guaranteed that this function will never have as a side effect
+ * some node->link to be invalidated, so it is safe to call this function
+ * from event handlers that will do stuff with node links later. */
+void clusterBroadcastMessage(clusterMsgSendBlock *msgblock) {
+    dictIterator *di;
+    dictEntry *de;
+
+    di = dictGetSafeIterator(server.cluster->nodes);
+    while((de = dictNext(di)) != NULL) {
+        clusterNode *node = dictGetVal(de);
+
+        if (node->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_HANDSHAKE))
+            continue;
+        clusterSendMessage(node->link,msgblock);
+    }
+    dictReleaseIterator(di);
+}
+
+/* Build the message header. hdr must point to a buffer at least
+ * sizeof(clusterMsg) in bytes. */
+static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen) {
+    uint64_t offset;
+    clusterNode *master;
+
+    /* If this node is a master, we send its slots bitmap and configEpoch.
+     * If this node is a slave we send the master's information instead (the
+     * node is flagged as slave so the receiver knows that it is NOT really
+     * in charge for this slots. */
+    master = (nodeIsSlave(myself) && myself->slaveof) ?
+              myself->slaveof : myself;
+
+    hdr->ver = htons(CLUSTER_PROTO_VER);
+    hdr->sig[0] = 'R';
+    hdr->sig[1] = 'C';
+    hdr->sig[2] = 'm';
+    hdr->sig[3] = 'b';
+    hdr->type = htons(type);
+    memcpy(hdr->sender,myself->name,CLUSTER_NAMELEN);
+
+    /* If cluster-announce-ip option is enabled, force the receivers of our
+     * packets to use the specified address for this node. Otherwise if the
+     * first byte is zero, they'll do auto discovery. */
+    memset(hdr->myip,0,NET_IP_STR_LEN);
+    if (server.cluster_announce_ip) {
+        redis_strlcpy(hdr->myip,server.cluster_announce_ip,NET_IP_STR_LEN);
+    }
+
+    /* Handle cluster-announce-[tls-|bus-]port. */
+    int announced_tcp_port, announced_tls_port, announced_cport;
+    deriveAnnouncedPorts(&announced_tcp_port, &announced_tls_port, &announced_cport);
+
+    memcpy(hdr->myslots,master->slots,sizeof(hdr->myslots));
+    memset(hdr->slaveof,0,CLUSTER_NAMELEN);
+    if (myself->slaveof != NULL)
+        memcpy(hdr->slaveof,myself->slaveof->name, CLUSTER_NAMELEN);
+    if (server.tls_cluster) {
+        hdr->port = htons(announced_tls_port);
+        hdr->pport = htons(announced_tcp_port);
+    } else {
+        hdr->port = htons(announced_tcp_port);
+        hdr->pport = htons(announced_tls_port);
+    }
+    hdr->cport = htons(announced_cport);
+    hdr->flags = htons(myself->flags);
+    hdr->state = server.cluster->state;
+
+    /* Set the currentEpoch and configEpochs. */
+    hdr->currentEpoch = htonu64(server.cluster->currentEpoch);
+    hdr->configEpoch = htonu64(master->configEpoch);
+
+    /* Set the replication offset. */
+    if (nodeIsSlave(myself))
+        offset = replicationGetSlaveOffset();
+    else
+        offset = server.master_repl_offset;
+    hdr->offset = htonu64(offset);
+
+    /* Set the message flags. */
+    if (clusterNodeIsMaster(myself) && server.cluster->mf_end)
+        hdr->mflags[0] |= CLUSTERMSG_FLAG0_PAUSED;
+
+    hdr->totlen = htonl(msglen);
+}
+
+/* Set the i-th entry of the gossip section in the message pointed by 'hdr'
+ * to the info of the specified node 'n'. */
+void clusterSetGossipEntry(clusterMsg *hdr, int i, clusterNode *n) {
+    clusterMsgDataGossip *gossip;
+    gossip = &(hdr->data.ping.gossip[i]);
+    memcpy(gossip->nodename,n->name,CLUSTER_NAMELEN);
+    gossip->ping_sent = htonl(n->ping_sent/1000);
+    gossip->pong_received = htonl(n->pong_received/1000);
+    memcpy(gossip->ip,n->ip,sizeof(n->ip));
+    if (server.tls_cluster) {
+        gossip->port = htons(n->tls_port);
+        gossip->pport = htons(n->tcp_port);
+    } else {
+        gossip->port = htons(n->tcp_port);
+        gossip->pport = htons(n->tls_port);
+    }
+    gossip->cport = htons(n->cport);
+    gossip->flags = htons(n->flags);
+    gossip->notused1 = 0;
+}
+
+/* Send a PING or PONG packet to the specified node, making sure to add enough
+ * gossip information. */
+void clusterSendPing(clusterLink *link, int type) {
+    static unsigned long long cluster_pings_sent = 0;
+    cluster_pings_sent++;
+    int gossipcount = 0; /* Number of gossip sections added so far. */
+    int wanted; /* Number of gossip sections we want to append if possible. */
+    int estlen; /* Upper bound on estimated packet length */
+    /* freshnodes is the max number of nodes we can hope to append at all:
+     * nodes available minus two (ourself and the node we are sending the
+     * message to). However practically there may be less valid nodes since
+     * nodes in handshake state, disconnected, are not considered. */
+    int freshnodes = dictSize(server.cluster->nodes)-2;
+
+    /* How many gossip sections we want to add? 1/10 of the number of nodes
+     * and anyway at least 3. Why 1/10?
+     *
+     * If we have N masters, with N/10 entries, and we consider that in
+     * node_timeout we exchange with each other node at least 4 packets
+     * (we ping in the worst case in node_timeout/2 time, and we also
+     * receive two pings from the host), we have a total of 8 packets
+     * in the node_timeout*2 failure reports validity time. So we have
+     * that, for a single PFAIL node, we can expect to receive the following
+     * number of failure reports (in the specified window of time):
+     *
+     * PROB * GOSSIP_ENTRIES_PER_PACKET * TOTAL_PACKETS:
+     *
+     * PROB = probability of being featured in a single gossip entry,
+     *        which is 1 / NUM_OF_NODES.
+     * ENTRIES = 10.
+     * TOTAL_PACKETS = 2 * 4 * NUM_OF_MASTERS.
+     *
+     * If we assume we have just masters (so num of nodes and num of masters
+     * is the same), with 1/10 we always get over the majority, and specifically
+     * 80% of the number of nodes, to account for many masters failing at the
+     * same time.
+     *
+     * Since we have non-voting slaves that lower the probability of an entry
+     * to feature our node, we set the number of entries per packet as
+     * 10% of the total nodes we have. */
+    wanted = floor(dictSize(server.cluster->nodes)/10);
+    if (wanted < 3) wanted = 3;
+    if (wanted > freshnodes) wanted = freshnodes;
+
+    /* Include all the nodes in PFAIL state, so that failure reports are
+     * faster to propagate to go from PFAIL to FAIL state. */
+    int pfail_wanted = server.cluster->stats_pfail_nodes;
+
+    /* Compute the maximum estlen to allocate our buffer. We'll fix the estlen
+     * later according to the number of gossip sections we really were able
+     * to put inside the packet. */
+    estlen = sizeof(clusterMsg) - sizeof(union clusterMsgData);
+    estlen += (sizeof(clusterMsgDataGossip)*(wanted + pfail_wanted));
+    estlen += writePingExt(NULL, 0);
+    /* Note: clusterBuildMessageHdr() expects the buffer to be always at least
+     * sizeof(clusterMsg) or more. */
+    if (estlen < (int)sizeof(clusterMsg)) estlen = sizeof(clusterMsg);
+    clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(type, estlen);
+    clusterMsg *hdr = &msgblock->msg;
+
+    if (!link->inbound && type == CLUSTERMSG_TYPE_PING)
+        link->node->ping_sent = mstime();
+
+    /* Populate the gossip fields */
+    int maxiterations = wanted*3;
+    while(freshnodes > 0 && gossipcount < wanted && maxiterations--) {
+        dictEntry *de = dictGetRandomKey(server.cluster->nodes);
+        clusterNode *this = dictGetVal(de);
+
+        /* Don't include this node: the whole packet header is about us
+         * already, so we just gossip about other nodes.
+         * Also, don't include the receiver. Receiver will not update its state
+         * based on gossips about itself. */
+        if (this == myself || this == link->node) continue;
+
+        /* PFAIL nodes will be added later. */
+        if (this->flags & CLUSTER_NODE_PFAIL) continue;
+
+        /* In the gossip section don't include:
+         * 1) Nodes in HANDSHAKE state.
+         * 3) Nodes with the NOADDR flag set.
+         * 4) Disconnected nodes if they don't have configured slots.
+         */
+        if (this->flags & (CLUSTER_NODE_HANDSHAKE|CLUSTER_NODE_NOADDR) ||
+            (this->link == NULL && this->numslots == 0))
+        {
+            freshnodes--; /* Technically not correct, but saves CPU. */
+            continue;
+        }
+
+        /* Do not add a node we already have. */
+        if (this->last_in_ping_gossip == cluster_pings_sent) continue;
+
+        /* Add it */
+        clusterSetGossipEntry(hdr,gossipcount,this);
+        this->last_in_ping_gossip = cluster_pings_sent;
+        freshnodes--;
+        gossipcount++;
+    }
+
+    /* If there are PFAIL nodes, add them at the end. */
+    if (pfail_wanted) {
+        dictIterator *di;
+        dictEntry *de;
+
+        di = dictGetSafeIterator(server.cluster->nodes);
+        while((de = dictNext(di)) != NULL && pfail_wanted > 0) {
+            clusterNode *node = dictGetVal(de);
+            if (node->flags & CLUSTER_NODE_HANDSHAKE) continue;
+            if (node->flags & CLUSTER_NODE_NOADDR) continue;
+            if (!(node->flags & CLUSTER_NODE_PFAIL)) continue;
+            clusterSetGossipEntry(hdr,gossipcount,node);
+            gossipcount++;
+            /* We take the count of the slots we allocated, since the
+             * PFAIL stats may not match perfectly with the current number
+             * of PFAIL nodes. */
+            pfail_wanted--;
+        }
+        dictReleaseIterator(di);
+    }
+
+    /* Compute the actual total length and send! */
+    uint32_t totlen = 0;
+    totlen += writePingExt(hdr, gossipcount);
+    totlen += sizeof(clusterMsg)-sizeof(union clusterMsgData);
+    totlen += (sizeof(clusterMsgDataGossip)*gossipcount);
+    serverAssert(gossipcount < USHRT_MAX);
+    hdr->count = htons(gossipcount);
+    hdr->totlen = htonl(totlen);
+
+    clusterSendMessage(link,msgblock);
+    clusterMsgSendBlockDecrRefCount(msgblock);
+}
+
+/* Send a PONG packet to every connected node that's not in handshake state
+ * and for which we have a valid link.
+ *
+ * In Redis Cluster pongs are not used just for failure detection, but also
+ * to carry important configuration information. So broadcasting a pong is
+ * useful when something changes in the configuration and we want to make
+ * the cluster aware ASAP (for instance after a slave promotion).
+ *
+ * The 'target' argument specifies the receiving instances using the
+ * defines below:
+ *
+ * CLUSTER_BROADCAST_ALL -> All known instances.
+ * CLUSTER_BROADCAST_LOCAL_SLAVES -> All slaves in my master-slaves ring.
+ */
+#define CLUSTER_BROADCAST_ALL 0
+#define CLUSTER_BROADCAST_LOCAL_SLAVES 1
+void clusterBroadcastPong(int target) {
+    dictIterator *di;
+    dictEntry *de;
+
+    di = dictGetSafeIterator(server.cluster->nodes);
+    while((de = dictNext(di)) != NULL) {
+        clusterNode *node = dictGetVal(de);
+
+        if (!node->link) continue;
+        if (node == myself || nodeInHandshake(node)) continue;
+        if (target == CLUSTER_BROADCAST_LOCAL_SLAVES) {
+            int local_slave =
+                nodeIsSlave(node) && node->slaveof &&
+                (node->slaveof == myself || node->slaveof == myself->slaveof);
+            if (!local_slave) continue;
+        }
+        clusterSendPing(node->link,CLUSTERMSG_TYPE_PONG);
+    }
+    dictReleaseIterator(di);
+}
+
+/* Create a PUBLISH message block.
+ *
+ * Sanitizer suppression: In clusterMsgDataPublish, sizeof(bulk_data) is 8.
+ * As all the struct is used as a buffer, when more than 8 bytes are copied into
+ * the 'bulk_data', sanitizer generates an out-of-bounds error which is a false
+ * positive in this context. */
+REDIS_NO_SANITIZE("bounds")
+clusterMsgSendBlock *clusterCreatePublishMsgBlock(robj *channel, robj *message, uint16_t type) {
+
+    uint32_t channel_len, message_len;
+
+    channel = getDecodedObject(channel);
+    message = getDecodedObject(message);
+    channel_len = sdslen(channel->ptr);
+    message_len = sdslen(message->ptr);
+
+    size_t msglen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+    msglen += sizeof(clusterMsgDataPublish) - 8 + channel_len + message_len;
+    clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(type, msglen);
+
+    clusterMsg *hdr = &msgblock->msg;
+    hdr->data.publish.msg.channel_len = htonl(channel_len);
+    hdr->data.publish.msg.message_len = htonl(message_len);
+    memcpy(hdr->data.publish.msg.bulk_data,channel->ptr,sdslen(channel->ptr));
+    memcpy(hdr->data.publish.msg.bulk_data+sdslen(channel->ptr),
+        message->ptr,sdslen(message->ptr));
+
+    decrRefCount(channel);
+    decrRefCount(message);
+    
+    return msgblock;
+}
+
+/* Send a FAIL message to all the nodes we are able to contact.
+ * The FAIL message is sent when we detect that a node is failing
+ * (CLUSTER_NODE_PFAIL) and we also receive a gossip confirmation of this:
+ * we switch the node state to CLUSTER_NODE_FAIL and ask all the other
+ * nodes to do the same ASAP. */
+void clusterSendFail(char *nodename) {
+    uint32_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData)
+        + sizeof(clusterMsgDataFail);
+    clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_FAIL, msglen);
+
+    clusterMsg *hdr = &msgblock->msg;
+    memcpy(hdr->data.fail.about.nodename,nodename,CLUSTER_NAMELEN);
+
+    clusterBroadcastMessage(msgblock);
+    clusterMsgSendBlockDecrRefCount(msgblock);
+}
+
+/* Send an UPDATE message to the specified link carrying the specified 'node'
+ * slots configuration. The node name, slots bitmap, and configEpoch info
+ * are included. */
+void clusterSendUpdate(clusterLink *link, clusterNode *node) {
+    if (link == NULL) return;
+
+    uint32_t msglen = sizeof(clusterMsg) - sizeof(union clusterMsgData)
+        + sizeof(clusterMsgDataUpdate);
+    clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_UPDATE, msglen);
+
+    clusterMsg *hdr = &msgblock->msg;
+    memcpy(hdr->data.update.nodecfg.nodename,node->name,CLUSTER_NAMELEN);
+    hdr->data.update.nodecfg.configEpoch = htonu64(node->configEpoch);
+    memcpy(hdr->data.update.nodecfg.slots,node->slots,sizeof(node->slots));
+    for (unsigned int i = 0; i < sizeof(node->slots); i++) {
+        /* Don't advertise slots that the node stopped claiming */
+        hdr->data.update.nodecfg.slots[i] = hdr->data.update.nodecfg.slots[i] & (~server.cluster->owner_not_claiming_slot[i]);
+    }
+
+    clusterSendMessage(link,msgblock);
+    clusterMsgSendBlockDecrRefCount(msgblock);
+}
+
+/* Send a MODULE message.
+ *
+ * If link is NULL, then the message is broadcasted to the whole cluster. */
+void clusterSendModule(clusterLink *link, uint64_t module_id, uint8_t type,
+                       const char *payload, uint32_t len) {
+    uint32_t msglen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+    msglen += sizeof(clusterMsgModule) - 3 + len;
+    clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_MODULE, msglen);
+
+    clusterMsg *hdr = &msgblock->msg;
+    hdr->data.module.msg.module_id = module_id; /* Already endian adjusted. */
+    hdr->data.module.msg.type = type;
+    hdr->data.module.msg.len = htonl(len);
+    memcpy(hdr->data.module.msg.bulk_data,payload,len);
+
+    if (link)
+        clusterSendMessage(link,msgblock);
+    else
+        clusterBroadcastMessage(msgblock);
+
+    clusterMsgSendBlockDecrRefCount(msgblock);
+}
+
+/* This function gets a cluster node ID string as target, the same way the nodes
+ * addresses are represented in the modules side, resolves the node, and sends
+ * the message. If the target is NULL the message is broadcasted.
+ *
+ * The function returns C_OK if the target is valid, otherwise C_ERR is
+ * returned. */
+int clusterSendModuleMessageToTarget(const char *target, uint64_t module_id, uint8_t type, const char *payload, uint32_t len) {
+    clusterNode *node = NULL;
+
+    if (target != NULL) {
+        node = clusterLookupNode(target, strlen(target));
+        if (node == NULL || node->link == NULL) return C_ERR;
+    }
+
+    clusterSendModule(target ? node->link : NULL,
+                      module_id, type, payload, len);
+    return C_OK;
+}
+
+/* -----------------------------------------------------------------------------
+ * CLUSTER Pub/Sub support
+ *
+ * If `sharded` is 0:
+ * For now we do very little, just propagating [S]PUBLISH messages across the whole
+ * cluster. In the future we'll try to get smarter and avoiding propagating those
+ * messages to hosts without receives for a given channel.
+ * Otherwise:
+ * Publish this message across the slot (primary/replica).
+ * -------------------------------------------------------------------------- */
+void clusterPropagatePublish(robj *channel, robj *message, int sharded) {
+    clusterMsgSendBlock *msgblock;
+
+    if (!sharded) {
+        msgblock = clusterCreatePublishMsgBlock(channel, message, CLUSTERMSG_TYPE_PUBLISH);
+        clusterBroadcastMessage(msgblock);
+        clusterMsgSendBlockDecrRefCount(msgblock);
+        return;
+    }
+
+    listIter li;
+    listNode *ln;
+    list *nodes_for_slot = clusterGetNodesInMyShard(server.cluster->myself);
+    serverAssert(nodes_for_slot != NULL);
+    listRewind(nodes_for_slot, &li);
+    msgblock = clusterCreatePublishMsgBlock(channel, message, CLUSTERMSG_TYPE_PUBLISHSHARD);
+    while((ln = listNext(&li))) {
+        clusterNode *node = listNodeValue(ln);
+        if (node->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_HANDSHAKE))
+            continue;
+        clusterSendMessage(node->link,msgblock);
+    }
+    clusterMsgSendBlockDecrRefCount(msgblock);
+}
+
+/* -----------------------------------------------------------------------------
+ * SLAVE node specific functions
+ * -------------------------------------------------------------------------- */
+
+/* This function sends a FAILOVER_AUTH_REQUEST message to every node in order to
+ * see if there is the quorum for this slave instance to failover its failing
+ * master.
+ *
+ * Note that we send the failover request to everybody, master and slave nodes,
+ * but only the masters are supposed to reply to our query. */
+void clusterRequestFailoverAuth(void) {
+    uint32_t msglen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+    clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST, msglen);
+
+    clusterMsg *hdr = &msgblock->msg;
+    /* If this is a manual failover, set the CLUSTERMSG_FLAG0_FORCEACK bit
+     * in the header to communicate the nodes receiving the message that
+     * they should authorized the failover even if the master is working. */
+    if (server.cluster->mf_end) hdr->mflags[0] |= CLUSTERMSG_FLAG0_FORCEACK;
+    clusterBroadcastMessage(msgblock);
+    clusterMsgSendBlockDecrRefCount(msgblock);
+}
+
+/* Send a FAILOVER_AUTH_ACK message to the specified node. */
+void clusterSendFailoverAuth(clusterNode *node) {
+    if (!node->link) return;
+
+    uint32_t msglen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+    clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK, msglen);
+
+    clusterSendMessage(node->link,msgblock);
+    clusterMsgSendBlockDecrRefCount(msgblock);
+}
+
+/* Send a MFSTART message to the specified node. */
+void clusterSendMFStart(clusterNode *node) {
+    if (!node->link) return;
+
+    uint32_t msglen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+    clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(CLUSTERMSG_TYPE_MFSTART, msglen);
+
+    clusterSendMessage(node->link,msgblock);
+    clusterMsgSendBlockDecrRefCount(msgblock);
+}
+
+/* Vote for the node asking for our vote if there are the conditions. */
+void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) {
+    clusterNode *master = node->slaveof;
+    uint64_t requestCurrentEpoch = ntohu64(request->currentEpoch);
+    uint64_t requestConfigEpoch = ntohu64(request->configEpoch);
+    unsigned char *claimed_slots = request->myslots;
+    int force_ack = request->mflags[0] & CLUSTERMSG_FLAG0_FORCEACK;
+    int j;
+
+    /* IF we are not a master serving at least 1 slot, we don't have the
+     * right to vote, as the cluster size in Redis Cluster is the number
+     * of masters serving at least one slot, and quorum is the cluster
+     * size + 1 */
+    if (nodeIsSlave(myself) || myself->numslots == 0) return;
+
+    /* Request epoch must be >= our currentEpoch.
+     * Note that it is impossible for it to actually be greater since
+     * our currentEpoch was updated as a side effect of receiving this
+     * request, if the request epoch was greater. */
+    if (requestCurrentEpoch < server.cluster->currentEpoch) {
+        serverLog(LL_WARNING,
+            "Failover auth denied to %.40s (%s): reqEpoch (%llu) < curEpoch(%llu)",
+            node->name, node->human_nodename,
+            (unsigned long long) requestCurrentEpoch,
+            (unsigned long long) server.cluster->currentEpoch);
+        return;
+    }
+
+    /* I already voted for this epoch? Return ASAP. */
+    if (server.cluster->lastVoteEpoch == server.cluster->currentEpoch) {
+        serverLog(LL_WARNING,
+                "Failover auth denied to %.40s (%s): already voted for epoch %llu",
+                node->name, node->human_nodename,
+                (unsigned long long) server.cluster->currentEpoch);
+        return;
+    }
+
+    /* Node must be a slave and its master down.
+     * The master can be non failing if the request is flagged
+     * with CLUSTERMSG_FLAG0_FORCEACK (manual failover). */
+    if (clusterNodeIsMaster(node) || master == NULL ||
+        (!nodeFailed(master) && !force_ack))
+    {
+        if (clusterNodeIsMaster(node)) {
+            serverLog(LL_WARNING,
+                    "Failover auth denied to %.40s (%s): it is a master node",
+                    node->name, node->human_nodename);
+        } else if (master == NULL) {
+            serverLog(LL_WARNING,
+                    "Failover auth denied to %.40s (%s): I don't know its master",
+                    node->name, node->human_nodename);
+        } else if (!nodeFailed(master)) {
+            serverLog(LL_WARNING,
+                    "Failover auth denied to %.40s (%s): its master is up",
+                    node->name, node->human_nodename);
+        }
+        return;
+    }
+
+    /* We did not voted for a slave about this master for two
+     * times the node timeout. This is not strictly needed for correctness
+     * of the algorithm but makes the base case more linear. */
+    if (mstime() - node->slaveof->voted_time < server.cluster_node_timeout * 2)
+    {
+        serverLog(LL_WARNING,
+                "Failover auth denied to %.40s %s: "
+                "can't vote about this master before %lld milliseconds",
+                node->name, node->human_nodename,
+                (long long) ((server.cluster_node_timeout*2)-
+                             (mstime() - node->slaveof->voted_time)));
+        return;
+    }
+
+    /* The slave requesting the vote must have a configEpoch for the claimed
+     * slots that is >= the one of the masters currently serving the same
+     * slots in the current configuration. */
+    for (j = 0; j < CLUSTER_SLOTS; j++) {
+        if (bitmapTestBit(claimed_slots, j) == 0) continue;
+        if (isSlotUnclaimed(j) ||
+            server.cluster->slots[j]->configEpoch <= requestConfigEpoch)
+        {
+            continue;
+        }
+        /* If we reached this point we found a slot that in our current slots
+         * is served by a master with a greater configEpoch than the one claimed
+         * by the slave requesting our vote. Refuse to vote for this slave. */
+        serverLog(LL_WARNING,
+                "Failover auth denied to %.40s (%s): "
+                "slot %d epoch (%llu) > reqEpoch (%llu)",
+                node->name, node->human_nodename, j,
+                (unsigned long long) server.cluster->slots[j]->configEpoch,
+                (unsigned long long) requestConfigEpoch);
+        return;
+    }
+
+    /* We can vote for this slave. */
+    server.cluster->lastVoteEpoch = server.cluster->currentEpoch;
+    node->slaveof->voted_time = mstime();
+    clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|CLUSTER_TODO_FSYNC_CONFIG);
+    clusterSendFailoverAuth(node);
+    serverLog(LL_NOTICE, "Failover auth granted to %.40s (%s) for epoch %llu",
+        node->name, node->human_nodename, (unsigned long long) server.cluster->currentEpoch);
+}
+
+/* This function returns the "rank" of this instance, a slave, in the context
+ * of its master-slaves ring. The rank of the slave is given by the number of
+ * other slaves for the same master that have a better replication offset
+ * compared to the local one (better means, greater, so they claim more data).
+ *
+ * A slave with rank 0 is the one with the greatest (most up to date)
+ * replication offset, and so forth. Note that because how the rank is computed
+ * multiple slaves may have the same rank, in case they have the same offset.
+ *
+ * The slave rank is used to add a delay to start an election in order to
+ * get voted and replace a failing master. Slaves with better replication
+ * offsets are more likely to win. */
+int clusterGetSlaveRank(void) {
+    long long myoffset;
+    int j, rank = 0;
+    clusterNode *master;
+
+    serverAssert(nodeIsSlave(myself));
+    master = myself->slaveof;
+    if (master == NULL) return 0; /* Never called by slaves without master. */
+
+    myoffset = replicationGetSlaveOffset();
+    for (j = 0; j < master->numslaves; j++)
+        if (master->slaves[j] != myself &&
+            !nodeCantFailover(master->slaves[j]) &&
+            master->slaves[j]->repl_offset > myoffset) rank++;
+    return rank;
+}
+
+/* This function is called by clusterHandleSlaveFailover() in order to
+ * let the slave log why it is not able to failover. Sometimes there are
+ * not the conditions, but since the failover function is called again and
+ * again, we can't log the same things continuously.
+ *
+ * This function works by logging only if a given set of conditions are
+ * true:
+ *
+ * 1) The reason for which the failover can't be initiated changed.
+ *    The reasons also include a NONE reason we reset the state to
+ *    when the slave finds that its master is fine (no FAIL flag).
+ * 2) Also, the log is emitted again if the master is still down and
+ *    the reason for not failing over is still the same, but more than
+ *    CLUSTER_CANT_FAILOVER_RELOG_PERIOD seconds elapsed.
+ * 3) Finally, the function only logs if the slave is down for more than
+ *    five seconds + NODE_TIMEOUT. This way nothing is logged when a
+ *    failover starts in a reasonable time.
+ *
+ * The function is called with the reason why the slave can't failover
+ * which is one of the integer macros CLUSTER_CANT_FAILOVER_*.
+ *
+ * The function is guaranteed to be called only if 'myself' is a slave. */
+void clusterLogCantFailover(int reason) {
+    char *msg;
+    static time_t lastlog_time = 0;
+    mstime_t nolog_fail_time = server.cluster_node_timeout + 5000;
+
+    /* Don't log if we have the same reason for some time. */
+    if (reason == server.cluster->cant_failover_reason &&
+        time(NULL)-lastlog_time < CLUSTER_CANT_FAILOVER_RELOG_PERIOD)
+        return;
+
+    server.cluster->cant_failover_reason = reason;
+
+    /* We also don't emit any log if the master failed no long ago, the
+     * goal of this function is to log slaves in a stalled condition for
+     * a long time. */
+    if (myself->slaveof &&
+        nodeFailed(myself->slaveof) &&
+        (mstime() - myself->slaveof->fail_time) < nolog_fail_time) return;
+
+    switch(reason) {
+    case CLUSTER_CANT_FAILOVER_DATA_AGE:
+        msg = "Disconnected from master for longer than allowed. "
+              "Please check the 'cluster-replica-validity-factor' configuration "
+              "option.";
+        break;
+    case CLUSTER_CANT_FAILOVER_WAITING_DELAY:
+        msg = "Waiting the delay before I can start a new failover.";
+        break;
+    case CLUSTER_CANT_FAILOVER_EXPIRED:
+        msg = "Failover attempt expired.";
+        break;
+    case CLUSTER_CANT_FAILOVER_WAITING_VOTES:
+        msg = "Waiting for votes, but majority still not reached.";
+        break;
+    default:
+        msg = "Unknown reason code.";
+        break;
+    }
+    lastlog_time = time(NULL);
+    serverLog(LL_NOTICE,"Currently unable to failover: %s", msg);
+    
+    int cur_vote = server.cluster->failover_auth_count;
+    int cur_quorum = (server.cluster->size / 2) + 1;
+    /* Emits a log when an election is in progress and waiting for votes or when the failover attempt expired. */
+    if (reason == CLUSTER_CANT_FAILOVER_WAITING_VOTES || reason == CLUSTER_CANT_FAILOVER_EXPIRED) {
+        serverLog(LL_NOTICE, "Needed quorum: %d. Number of votes received so far: %d", cur_quorum, cur_vote);
+    } 
+}
+
+/* This function implements the final part of automatic and manual failovers,
+ * where the slave grabs its master's hash slots, and propagates the new
+ * configuration.
+ *
+ * Note that it's up to the caller to be sure that the node got a new
+ * configuration epoch already. */
+void clusterFailoverReplaceYourMaster(void) {
+    int j;
+    clusterNode *oldmaster = myself->slaveof;
+
+    if (clusterNodeIsMaster(myself) || oldmaster == NULL) return;
+
+    /* 1) Turn this node into a master. */
+    clusterSetNodeAsMaster(myself);
+    replicationUnsetMaster();
+
+    /* 2) Claim all the slots assigned to our master. */
+    for (j = 0; j < CLUSTER_SLOTS; j++) {
+        if (clusterNodeCoversSlot(oldmaster, j)) {
+            clusterDelSlot(j);
+            clusterAddSlot(myself,j);
+        }
+    }
+
+    /* 3) Update state and save config. */
+    clusterUpdateState();
+    clusterSaveConfigOrDie(1);
+
+    /* 4) Pong all the other nodes so that they can update the state
+     *    accordingly and detect that we switched to master role. */
+    clusterBroadcastPong(CLUSTER_BROADCAST_ALL);
+
+    /* 5) If there was a manual failover in progress, clear the state. */
+    resetManualFailover();
+}
+
+/* This function is called if we are a slave node and our master serving
+ * a non-zero amount of hash slots is in FAIL state.
+ *
+ * The goal of this function is:
+ * 1) To check if we are able to perform a failover, is our data updated?
+ * 2) Try to get elected by masters.
+ * 3) Perform the failover informing all the other nodes.
+ */
+void clusterHandleSlaveFailover(void) {
+    mstime_t data_age;
+    mstime_t auth_age = mstime() - server.cluster->failover_auth_time;
+    int needed_quorum = (server.cluster->size / 2) + 1;
+    int manual_failover = server.cluster->mf_end != 0 &&
+                          server.cluster->mf_can_start;
+    mstime_t auth_timeout, auth_retry_time;
+
+    server.cluster->todo_before_sleep &= ~CLUSTER_TODO_HANDLE_FAILOVER;
+
+    /* Compute the failover timeout (the max time we have to send votes
+     * and wait for replies), and the failover retry time (the time to wait
+     * before trying to get voted again).
+     *
+     * Timeout is MAX(NODE_TIMEOUT*2,2000) milliseconds.
+     * Retry is two times the Timeout.
+     */
+    auth_timeout = server.cluster_node_timeout*2;
+    if (auth_timeout < 2000) auth_timeout = 2000;
+    auth_retry_time = auth_timeout*2;
+
+    /* Pre conditions to run the function, that must be met both in case
+     * of an automatic or manual failover:
+     * 1) We are a slave.
+     * 2) Our master is flagged as FAIL, or this is a manual failover.
+     * 3) We don't have the no failover configuration set, and this is
+     *    not a manual failover.
+     * 4) It is serving slots. */
+    if (clusterNodeIsMaster(myself) ||
+        myself->slaveof == NULL ||
+        (!nodeFailed(myself->slaveof) && !manual_failover) ||
+        (server.cluster_slave_no_failover && !manual_failover) ||
+        myself->slaveof->numslots == 0)
+    {
+        /* There are no reasons to failover, so we set the reason why we
+         * are returning without failing over to NONE. */
+        server.cluster->cant_failover_reason = CLUSTER_CANT_FAILOVER_NONE;
+        return;
+    }
+
+    /* Set data_age to the number of milliseconds we are disconnected from
+     * the master. */
+    if (server.repl_state == REPL_STATE_CONNECTED) {
+        data_age = (mstime_t)(server.unixtime - server.master->lastinteraction)
+                   * 1000;
+    } else {
+        data_age = (mstime_t)(server.unixtime - server.repl_down_since) * 1000;
+    }
+
+    /* Remove the node timeout from the data age as it is fine that we are
+     * disconnected from our master at least for the time it was down to be
+     * flagged as FAIL, that's the baseline. */
+    if (data_age > server.cluster_node_timeout)
+        data_age -= server.cluster_node_timeout;
+
+    /* Check if our data is recent enough according to the slave validity
+     * factor configured by the user.
+     *
+     * Check bypassed for manual failovers. */
+    if (server.cluster_slave_validity_factor &&
+        data_age >
+        (((mstime_t)server.repl_ping_slave_period * 1000) +
+         (server.cluster_node_timeout * server.cluster_slave_validity_factor)))
+    {
+        if (!manual_failover) {
+            clusterLogCantFailover(CLUSTER_CANT_FAILOVER_DATA_AGE);
+            return;
+        }
+    }
+
+    /* If the previous failover attempt timeout and the retry time has
+     * elapsed, we can setup a new one. */
+    if (auth_age > auth_retry_time) {
+        server.cluster->failover_auth_time = mstime() +
+            500 + /* Fixed delay of 500 milliseconds, let FAIL msg propagate. */
+            random() % 500; /* Random delay between 0 and 500 milliseconds. */
+        server.cluster->failover_auth_count = 0;
+        server.cluster->failover_auth_sent = 0;
+        server.cluster->failover_auth_rank = clusterGetSlaveRank();
+        /* We add another delay that is proportional to the slave rank.
+         * Specifically 1 second * rank. This way slaves that have a probably
+         * less updated replication offset, are penalized. */
+        server.cluster->failover_auth_time +=
+            server.cluster->failover_auth_rank * 1000;
+        /* However if this is a manual failover, no delay is needed. */
+        if (server.cluster->mf_end) {
+            server.cluster->failover_auth_time = mstime();
+            server.cluster->failover_auth_rank = 0;
+            clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_FAILOVER);
+        }
+        serverLog(LL_NOTICE,
+            "Start of election delayed for %lld milliseconds "
+            "(rank #%d, offset %lld).",
+            server.cluster->failover_auth_time - mstime(),
+            server.cluster->failover_auth_rank,
+            replicationGetSlaveOffset());
+        /* Now that we have a scheduled election, broadcast our offset
+         * to all the other slaves so that they'll updated their offsets
+         * if our offset is better. */
+        clusterBroadcastPong(CLUSTER_BROADCAST_LOCAL_SLAVES);
+        return;
+    }
+
+    /* It is possible that we received more updated offsets from other
+     * slaves for the same master since we computed our election delay.
+     * Update the delay if our rank changed.
+     *
+     * Not performed if this is a manual failover. */
+    if (server.cluster->failover_auth_sent == 0 &&
+        server.cluster->mf_end == 0)
+    {
+        int newrank = clusterGetSlaveRank();
+        if (newrank > server.cluster->failover_auth_rank) {
+            long long added_delay =
+                (newrank - server.cluster->failover_auth_rank) * 1000;
+            server.cluster->failover_auth_time += added_delay;
+            server.cluster->failover_auth_rank = newrank;
+            serverLog(LL_NOTICE,
+                "Replica rank updated to #%d, added %lld milliseconds of delay.",
+                newrank, added_delay);
+        }
+    }
+
+    /* Return ASAP if we can't still start the election. */
+    if (mstime() < server.cluster->failover_auth_time) {
+        clusterLogCantFailover(CLUSTER_CANT_FAILOVER_WAITING_DELAY);
+        return;
+    }
+
+    /* Return ASAP if the election is too old to be valid. */
+    if (auth_age > auth_timeout) {
+        clusterLogCantFailover(CLUSTER_CANT_FAILOVER_EXPIRED);
+        return;
+    }
+
+    /* Ask for votes if needed. */
+    if (server.cluster->failover_auth_sent == 0) {
+        server.cluster->currentEpoch++;
+        server.cluster->failover_auth_epoch = server.cluster->currentEpoch;
+        serverLog(LL_NOTICE,"Starting a failover election for epoch %llu.",
+            (unsigned long long) server.cluster->currentEpoch);
+        clusterRequestFailoverAuth();
+        server.cluster->failover_auth_sent = 1;
+        clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+                             CLUSTER_TODO_UPDATE_STATE|
+                             CLUSTER_TODO_FSYNC_CONFIG);
+        return; /* Wait for replies. */
+    }
+
+    /* Check if we reached the quorum. */
+    if (server.cluster->failover_auth_count >= needed_quorum) {
+        /* We have the quorum, we can finally failover the master. */
+
+        serverLog(LL_NOTICE,
+            "Failover election won: I'm the new master.");
+
+        /* Update my configEpoch to the epoch of the election. */
+        if (myself->configEpoch < server.cluster->failover_auth_epoch) {
+            myself->configEpoch = server.cluster->failover_auth_epoch;
+            serverLog(LL_NOTICE,
+                "configEpoch set to %llu after successful failover",
+                (unsigned long long) myself->configEpoch);
+        }
+
+        /* Take responsibility for the cluster slots. */
+        clusterFailoverReplaceYourMaster();
+    } else {
+        clusterLogCantFailover(CLUSTER_CANT_FAILOVER_WAITING_VOTES);
+    }
+}
+
+/* -----------------------------------------------------------------------------
+ * CLUSTER slave migration
+ *
+ * Slave migration is the process that allows a slave of a master that is
+ * already covered by at least another slave, to "migrate" to a master that
+ * is orphaned, that is, left with no working slaves.
+ * ------------------------------------------------------------------------- */
+
+/* This function is responsible to decide if this replica should be migrated
+ * to a different (orphaned) master. It is called by the clusterCron() function
+ * only if:
+ *
+ * 1) We are a slave node.
+ * 2) It was detected that there is at least one orphaned master in
+ *    the cluster.
+ * 3) We are a slave of one of the masters with the greatest number of
+ *    slaves.
+ *
+ * This checks are performed by the caller since it requires to iterate
+ * the nodes anyway, so we spend time into clusterHandleSlaveMigration()
+ * if definitely needed.
+ *
+ * The function is called with a pre-computed max_slaves, that is the max
+ * number of working (not in FAIL state) slaves for a single master.
+ *
+ * Additional conditions for migration are examined inside the function.
+ */
+void clusterHandleSlaveMigration(int max_slaves) {
+    int j, okslaves = 0;
+    clusterNode *mymaster = myself->slaveof, *target = NULL, *candidate = NULL;
+    dictIterator *di;
+    dictEntry *de;
+
+    /* Step 1: Don't migrate if the cluster state is not ok. */
+    if (server.cluster->state != CLUSTER_OK) return;
+
+    /* Step 2: Don't migrate if my master will not be left with at least
+     *         'migration-barrier' slaves after my migration. */
+    if (mymaster == NULL) return;
+    for (j = 0; j < mymaster->numslaves; j++)
+        if (!nodeFailed(mymaster->slaves[j]) &&
+            !nodeTimedOut(mymaster->slaves[j])) okslaves++;
+    if (okslaves <= server.cluster_migration_barrier) return;
+
+    /* Step 3: Identify a candidate for migration, and check if among the
+     * masters with the greatest number of ok slaves, I'm the one with the
+     * smallest node ID (the "candidate slave").
+     *
+     * Note: this means that eventually a replica migration will occur
+     * since slaves that are reachable again always have their FAIL flag
+     * cleared, so eventually there must be a candidate.
+     * There is a possible race condition causing multiple
+     * slaves to migrate at the same time, but this is unlikely to
+     * happen and relatively harmless when it does. */
+    candidate = myself;
+    di = dictGetSafeIterator(server.cluster->nodes);
+    while((de = dictNext(di)) != NULL) {
+        clusterNode *node = dictGetVal(de);
+        int okslaves = 0, is_orphaned = 1;
+
+        /* We want to migrate only if this master is working, orphaned, and
+         * used to have slaves or if failed over a master that had slaves
+         * (MIGRATE_TO flag). This way we only migrate to instances that were
+         * supposed to have replicas. */
+        if (nodeIsSlave(node) || nodeFailed(node)) is_orphaned = 0;
+        if (!(node->flags & CLUSTER_NODE_MIGRATE_TO)) is_orphaned = 0;
+
+        /* Check number of working slaves. */
+        if (clusterNodeIsMaster(node)) okslaves = clusterCountNonFailingSlaves(node);
+        if (okslaves > 0) is_orphaned = 0;
+
+        if (is_orphaned) {
+            if (!target && node->numslots > 0) target = node;
+
+            /* Track the starting time of the orphaned condition for this
+             * master. */
+            if (!node->orphaned_time) node->orphaned_time = mstime();
+        } else {
+            node->orphaned_time = 0;
+        }
+
+        /* Check if I'm the slave candidate for the migration: attached
+         * to a master with the maximum number of slaves and with the smallest
+         * node ID. */
+        if (okslaves == max_slaves) {
+            for (j = 0; j < node->numslaves; j++) {
+                if (memcmp(node->slaves[j]->name,
+                           candidate->name,
+                           CLUSTER_NAMELEN) < 0)
+                {
+                    candidate = node->slaves[j];
+                }
+            }
+        }
+    }
+    dictReleaseIterator(di);
+
+    /* Step 4: perform the migration if there is a target, and if I'm the
+     * candidate, but only if the master is continuously orphaned for a
+     * couple of seconds, so that during failovers, we give some time to
+     * the natural slaves of this instance to advertise their switch from
+     * the old master to the new one. */
+    if (target && candidate == myself &&
+        (mstime()-target->orphaned_time) > CLUSTER_SLAVE_MIGRATION_DELAY &&
+       !(server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_FAILOVER))
+    {
+        serverLog(LL_NOTICE,"Migrating to orphaned master %.40s",
+            target->name);
+        clusterSetMaster(target);
+    }
+}
+
+/* -----------------------------------------------------------------------------
+ * CLUSTER manual failover
+ *
+ * This are the important steps performed by slaves during a manual failover:
+ * 1) User send CLUSTER FAILOVER command. The failover state is initialized
+ *    setting mf_end to the millisecond unix time at which we'll abort the
+ *    attempt.
+ * 2) Slave sends a MFSTART message to the master requesting to pause clients
+ *    for two times the manual failover timeout CLUSTER_MF_TIMEOUT.
+ *    When master is paused for manual failover, it also starts to flag
+ *    packets with CLUSTERMSG_FLAG0_PAUSED.
+ * 3) Slave waits for master to send its replication offset flagged as PAUSED.
+ * 4) If slave received the offset from the master, and its offset matches,
+ *    mf_can_start is set to 1, and clusterHandleSlaveFailover() will perform
+ *    the failover as usually, with the difference that the vote request
+ *    will be modified to force masters to vote for a slave that has a
+ *    working master.
+ *
+ * From the point of view of the master things are simpler: when a
+ * PAUSE_CLIENTS packet is received the master sets mf_end as well and
+ * the sender in mf_slave. During the time limit for the manual failover
+ * the master will just send PINGs more often to this slave, flagged with
+ * the PAUSED flag, so that the slave will set mf_master_offset when receiving
+ * a packet from the master with this flag set.
+ *
+ * The goal of the manual failover is to perform a fast failover without
+ * data loss due to the asynchronous master-slave replication.
+ * -------------------------------------------------------------------------- */
+
+/* Reset the manual failover state. This works for both masters and slaves
+ * as all the state about manual failover is cleared.
+ *
+ * The function can be used both to initialize the manual failover state at
+ * startup or to abort a manual failover in progress. */
+void resetManualFailover(void) {
+    if (server.cluster->mf_slave) {
+        /* We were a master failing over, so we paused clients and related actions.
+         * Regardless of the outcome we unpause now to allow traffic again. */
+        unpauseActions(PAUSE_DURING_FAILOVER);
+    }
+    server.cluster->mf_end = 0; /* No manual failover in progress. */
+    server.cluster->mf_can_start = 0;
+    server.cluster->mf_slave = NULL;
+    server.cluster->mf_master_offset = -1;
+}
+
+/* If a manual failover timed out, abort it. */
+void manualFailoverCheckTimeout(void) {
+    if (server.cluster->mf_end && server.cluster->mf_end < mstime()) {
+        serverLog(LL_WARNING,"Manual failover timed out.");
+        resetManualFailover();
+    }
+}
+
+/* This function is called from the cluster cron function in order to go
+ * forward with a manual failover state machine. */
+void clusterHandleManualFailover(void) {
+    /* Return ASAP if no manual failover is in progress. */
+    if (server.cluster->mf_end == 0) return;
+
+    /* If mf_can_start is non-zero, the failover was already triggered so the
+     * next steps are performed by clusterHandleSlaveFailover(). */
+    if (server.cluster->mf_can_start) return;
+
+    if (server.cluster->mf_master_offset == -1) return; /* Wait for offset... */
+
+    if (server.cluster->mf_master_offset == replicationGetSlaveOffset()) {
+        /* Our replication offset matches the master replication offset
+         * announced after clients were paused. We can start the failover. */
+        server.cluster->mf_can_start = 1;
+        serverLog(LL_NOTICE,
+            "All master replication stream processed, "
+            "manual failover can start.");
+        clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_FAILOVER);
+        return;
+    }
+    clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_MANUALFAILOVER);
+}
+
+/* -----------------------------------------------------------------------------
+ * CLUSTER cron job
+ * -------------------------------------------------------------------------- */
+
+/* Check if the node is disconnected and re-establish the connection.
+ * Also update a few stats while we are here, that can be used to make
+ * better decisions in other part of the code. */
+static int clusterNodeCronHandleReconnect(clusterNode *node, mstime_t handshake_timeout, mstime_t now) {
+    /* Not interested in reconnecting the link with myself or nodes
+     * for which we have no address. */
+    if (node->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_NOADDR)) return 1;
+
+    if (node->flags & CLUSTER_NODE_PFAIL)
+        server.cluster->stats_pfail_nodes++;
+
+    /* A Node in HANDSHAKE state has a limited lifespan equal to the
+     * configured node timeout. */
+    if (nodeInHandshake(node) && now - node->ctime > handshake_timeout) {
+        clusterDelNode(node);
+        return 1;
+    }
+
+    if (node->link == NULL) {
+        clusterLink *link = createClusterLink(node);
+        link->conn = connCreate(connTypeOfCluster());
+        connSetPrivateData(link->conn, link);
+        if (connConnect(link->conn, node->ip, node->cport, server.bind_source_addr,
+                    clusterLinkConnectHandler) == C_ERR) {
+            /* We got a synchronous error from connect before
+             * clusterSendPing() had a chance to be called.
+             * If node->ping_sent is zero, failure detection can't work,
+             * so we claim we actually sent a ping now (that will
+             * be really sent as soon as the link is obtained). */
+            if (node->ping_sent == 0) node->ping_sent = mstime();
+            serverLog(LL_DEBUG, "Unable to connect to "
+                "Cluster Node [%s]:%d -> %s", node->ip,
+                node->cport, server.neterr);
+
+            freeClusterLink(link);
+            return 0;
+        }
+    }
+    return 0;
+}
+
+static void freeClusterLinkOnBufferLimitReached(clusterLink *link) {
+    if (link == NULL || server.cluster_link_msg_queue_limit_bytes == 0) {
+        return;
+    }
+
+    unsigned long long mem_link = link->send_msg_queue_mem;
+    if (mem_link > server.cluster_link_msg_queue_limit_bytes) {
+        serverLog(LL_WARNING, "Freeing cluster link(%s node %.40s, used memory: %llu) due to "
+                "exceeding send buffer memory limit.", link->inbound ? "from" : "to",
+                link->node ? link->node->name : "", mem_link);
+        freeClusterLink(link);
+        server.cluster->stat_cluster_links_buffer_limit_exceeded++;
+    }
+}
+
+/* Free outbound link to a node if its send buffer size exceeded limit. */
+static void clusterNodeCronFreeLinkOnBufferLimitReached(clusterNode *node) {
+    freeClusterLinkOnBufferLimitReached(node->link);
+    freeClusterLinkOnBufferLimitReached(node->inbound_link);
+}
+
+/* This is executed 10 times every second */
+void clusterCron(void) {
+    dictIterator *di;
+    dictEntry *de;
+    int update_state = 0;
+    int orphaned_masters; /* How many masters there are without ok slaves. */
+    int max_slaves; /* Max number of ok slaves for a single master. */
+    int this_slaves; /* Number of ok slaves for our master (if we are slave). */
+    mstime_t min_pong = 0, now = mstime();
+    clusterNode *min_pong_node = NULL;
+    static unsigned long long iteration = 0;
+    mstime_t handshake_timeout;
+
+    iteration++; /* Number of times this function was called so far. */
+
+    clusterUpdateMyselfHostname();
+
+    /* The handshake timeout is the time after which a handshake node that was
+     * not turned into a normal node is removed from the nodes. Usually it is
+     * just the NODE_TIMEOUT value, but when NODE_TIMEOUT is too small we use
+     * the value of 1 second. */
+    handshake_timeout = server.cluster_node_timeout;
+    if (handshake_timeout < 1000) handshake_timeout = 1000;
+
+    /* Clear so clusterNodeCronHandleReconnect can count the number of nodes in PFAIL. */
+    server.cluster->stats_pfail_nodes = 0;
+    /* Run through some of the operations we want to do on each cluster node. */
+    di = dictGetSafeIterator(server.cluster->nodes);
+    while((de = dictNext(di)) != NULL) {
+        clusterNode *node = dictGetVal(de);
+        /* We free the inbound or outboud link to the node if the link has an
+         * oversized message send queue and immediately try reconnecting. */
+        clusterNodeCronFreeLinkOnBufferLimitReached(node);
+        /* The protocol is that function(s) below return non-zero if the node was
+         * terminated.
+         */
+        if(clusterNodeCronHandleReconnect(node, handshake_timeout, now)) continue;
+    }
+    dictReleaseIterator(di); 
+
+    /* Ping some random node 1 time every 10 iterations, so that we usually ping
+     * one random node every second. */
+    if (!(iteration % 10)) {
+        int j;
+
+        /* Check a few random nodes and ping the one with the oldest
+         * pong_received time. */
+        for (j = 0; j < 5; j++) {
+            de = dictGetRandomKey(server.cluster->nodes);
+            clusterNode *this = dictGetVal(de);
+
+            /* Don't ping nodes disconnected or with a ping currently active. */
+            if (this->link == NULL || this->ping_sent != 0) continue;
+            if (this->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_HANDSHAKE))
+                continue;
+            if (min_pong_node == NULL || min_pong > this->pong_received) {
+                min_pong_node = this;
+                min_pong = this->pong_received;
+            }
+        }
+        if (min_pong_node) {
+            serverLog(LL_DEBUG,"Pinging node %.40s", min_pong_node->name);
+            clusterSendPing(min_pong_node->link, CLUSTERMSG_TYPE_PING);
+        }
+    }
+
+    /* Iterate nodes to check if we need to flag something as failing.
+     * This loop is also responsible to:
+     * 1) Check if there are orphaned masters (masters without non failing
+     *    slaves).
+     * 2) Count the max number of non failing slaves for a single master.
+     * 3) Count the number of slaves for our master, if we are a slave. */
+    orphaned_masters = 0;
+    max_slaves = 0;
+    this_slaves = 0;
+    di = dictGetSafeIterator(server.cluster->nodes);
+    while((de = dictNext(di)) != NULL) {
+        clusterNode *node = dictGetVal(de);
+        now = mstime(); /* Use an updated time at every iteration. */
+
+        if (node->flags &
+            (CLUSTER_NODE_MYSELF|CLUSTER_NODE_NOADDR|CLUSTER_NODE_HANDSHAKE))
+                continue;
+
+        /* Orphaned master check, useful only if the current instance
+         * is a slave that may migrate to another master. */
+        if (nodeIsSlave(myself) && clusterNodeIsMaster(node) && !nodeFailed(node)) {
+            int okslaves = clusterCountNonFailingSlaves(node);
+
+            /* A master is orphaned if it is serving a non-zero number of
+             * slots, have no working slaves, but used to have at least one
+             * slave, or failed over a master that used to have slaves. */
+            if (okslaves == 0 && node->numslots > 0 &&
+                node->flags & CLUSTER_NODE_MIGRATE_TO)
+            {
+                orphaned_masters++;
+            }
+            if (okslaves > max_slaves) max_slaves = okslaves;
+            if (myself->slaveof == node)
+                this_slaves = okslaves;
+        }
+
+        /* If we are not receiving any data for more than half the cluster
+         * timeout, reconnect the link: maybe there is a connection
+         * issue even if the node is alive. */
+        mstime_t ping_delay = now - node->ping_sent;
+        mstime_t data_delay = now - node->data_received;
+        if (node->link && /* is connected */
+            now - node->link->ctime >
+            server.cluster_node_timeout && /* was not already reconnected */
+            node->ping_sent && /* we already sent a ping */
+            /* and we are waiting for the pong more than timeout/2 */
+            ping_delay > server.cluster_node_timeout/2 &&
+            /* and in such interval we are not seeing any traffic at all. */
+            data_delay > server.cluster_node_timeout/2)
+        {
+            /* Disconnect the link, it will be reconnected automatically. */
+            freeClusterLink(node->link);
+        }
+
+        /* If we have currently no active ping in this instance, and the
+         * received PONG is older than half the cluster timeout, send
+         * a new ping now, to ensure all the nodes are pinged without
+         * a too big delay. */
+        mstime_t ping_interval = server.cluster_ping_interval ? 
+            server.cluster_ping_interval : server.cluster_node_timeout/2;
+        if (node->link &&
+            node->ping_sent == 0 &&
+            (now - node->pong_received) > ping_interval)
+        {
+            clusterSendPing(node->link, CLUSTERMSG_TYPE_PING);
+            continue;
+        }
+
+        /* If we are a master and one of the slaves requested a manual
+         * failover, ping it continuously. */
+        if (server.cluster->mf_end &&
+            clusterNodeIsMaster(myself) &&
+            server.cluster->mf_slave == node &&
+            node->link)
+        {
+            clusterSendPing(node->link, CLUSTERMSG_TYPE_PING);
+            continue;
+        }
+
+        /* Check only if we have an active ping for this instance. */
+        if (node->ping_sent == 0) continue;
+
+        /* Check if this node looks unreachable.
+         * Note that if we already received the PONG, then node->ping_sent
+         * is zero, so can't reach this code at all, so we don't risk of
+         * checking for a PONG delay if we didn't sent the PING.
+         *
+         * We also consider every incoming data as proof of liveness, since
+         * our cluster bus link is also used for data: under heavy data
+         * load pong delays are possible. */
+        mstime_t node_delay = (ping_delay < data_delay) ? ping_delay :
+                                                          data_delay;
+
+        if (node_delay > server.cluster_node_timeout) {
+            /* Timeout reached. Set the node as possibly failing if it is
+             * not already in this state. */
+            if (!(node->flags & (CLUSTER_NODE_PFAIL|CLUSTER_NODE_FAIL))) {
+                node->flags |= CLUSTER_NODE_PFAIL;
+                update_state = 1;
+                if (clusterNodeIsMaster(myself) && server.cluster->size == 1) {
+                    markNodeAsFailingIfNeeded(node);                    
+                } else {
+                    serverLog(LL_DEBUG,"*** NODE %.40s possibly failing", node->name);
+                }
+            }
+        }
+    }
+    dictReleaseIterator(di);
+
+    /* If we are a slave node but the replication is still turned off,
+     * enable it if we know the address of our master and it appears to
+     * be up. */
+    if (nodeIsSlave(myself) &&
+        server.masterhost == NULL &&
+        myself->slaveof &&
+        nodeHasAddr(myself->slaveof))
+    {
+        replicationSetMaster(myself->slaveof->ip, getNodeDefaultReplicationPort(myself->slaveof));
+    }
+
+    /* Abort a manual failover if the timeout is reached. */
+    manualFailoverCheckTimeout();
+
+    if (nodeIsSlave(myself)) {
+        clusterHandleManualFailover();
+        if (!(server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_FAILOVER))
+            clusterHandleSlaveFailover();
+        /* If there are orphaned slaves, and we are a slave among the masters
+         * with the max number of non-failing slaves, consider migrating to
+         * the orphaned masters. Note that it does not make sense to try
+         * a migration if there is no master with at least *two* working
+         * slaves. */
+        if (orphaned_masters && max_slaves >= 2 && this_slaves == max_slaves &&
+            server.cluster_allow_replica_migration)
+            clusterHandleSlaveMigration(max_slaves);
+    }
+
+    if (update_state || server.cluster->state == CLUSTER_FAIL)
+        clusterUpdateState();
+}
+
+/* This function is called before the event handler returns to sleep for
+ * events. It is useful to perform operations that must be done ASAP in
+ * reaction to events fired but that are not safe to perform inside event
+ * handlers, or to perform potentially expansive tasks that we need to do
+ * a single time before replying to clients. */
+void clusterBeforeSleep(void) {
+    int flags = server.cluster->todo_before_sleep;
+
+    /* Reset our flags (not strictly needed since every single function
+     * called for flags set should be able to clear its flag). */
+    server.cluster->todo_before_sleep = 0;
+
+    if (flags & CLUSTER_TODO_HANDLE_MANUALFAILOVER) {
+        /* Handle manual failover as soon as possible so that won't have a 100ms
+         * as it was handled only in clusterCron */
+        if(nodeIsSlave(myself)) {
+            clusterHandleManualFailover();
+            if (!(server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_FAILOVER))
+                clusterHandleSlaveFailover();
+        }
+    } else if (flags & CLUSTER_TODO_HANDLE_FAILOVER) {
+        /* Handle failover, this is needed when it is likely that there is already
+         * the quorum from masters in order to react fast. */
+        clusterHandleSlaveFailover();
+    }
+
+    /* Update the cluster state. */
+    if (flags & CLUSTER_TODO_UPDATE_STATE)
+        clusterUpdateState();
+
+    /* Save the config, possibly using fsync. */
+    if (flags & CLUSTER_TODO_SAVE_CONFIG) {
+        int fsync = flags & CLUSTER_TODO_FSYNC_CONFIG;
+        clusterSaveConfigOrDie(fsync);
+    }
+}
+
+void clusterDoBeforeSleep(int flags) {
+    server.cluster->todo_before_sleep |= flags;
+}
+
+/* -----------------------------------------------------------------------------
+ * Slots management
+ * -------------------------------------------------------------------------- */
+
+/* Test bit 'pos' in a generic bitmap. Return 1 if the bit is set,
+ * otherwise 0. */
+int bitmapTestBit(unsigned char *bitmap, int pos) {
+    off_t byte = pos/8;
+    int bit = pos&7;
+    return (bitmap[byte] & (1<<bit)) != 0;
+}
+
+/* Set the bit at position 'pos' in a bitmap. */
+void bitmapSetBit(unsigned char *bitmap, int pos) {
+    off_t byte = pos/8;
+    int bit = pos&7;
+    bitmap[byte] |= 1<<bit;
+}
+
+/* Clear the bit at position 'pos' in a bitmap. */
+void bitmapClearBit(unsigned char *bitmap, int pos) {
+    off_t byte = pos/8;
+    int bit = pos&7;
+    bitmap[byte] &= ~(1<<bit);
+}
+
+/* Return non-zero if there is at least one master with slaves in the cluster.
+ * Otherwise zero is returned. Used by clusterNodeSetSlotBit() to set the
+ * MIGRATE_TO flag the when a master gets the first slot. */
+int clusterMastersHaveSlaves(void) {
+    dictIterator *di = dictGetSafeIterator(server.cluster->nodes);
+    dictEntry *de;
+    int slaves = 0;
+    while((de = dictNext(di)) != NULL) {
+        clusterNode *node = dictGetVal(de);
+
+        if (nodeIsSlave(node)) continue;
+        slaves += node->numslaves;
+    }
+    dictReleaseIterator(di);
+    return slaves != 0;
+}
+
+/* Set the slot bit and return the old value. */
+int clusterNodeSetSlotBit(clusterNode *n, int slot) {
+    int old = bitmapTestBit(n->slots,slot);
+    if (!old) {
+        bitmapSetBit(n->slots,slot);
+        n->numslots++;
+        /* When a master gets its first slot, even if it has no slaves,
+         * it gets flagged with MIGRATE_TO, that is, the master is a valid
+         * target for replicas migration, if and only if at least one of
+         * the other masters has slaves right now.
+         *
+         * Normally masters are valid targets of replica migration if:
+         * 1. The used to have slaves (but no longer have).
+         * 2. They are slaves failing over a master that used to have slaves.
+         *
+         * However new masters with slots assigned are considered valid
+         * migration targets if the rest of the cluster is not a slave-less.
+         *
+         * See https://github.com/redis/redis/issues/3043 for more info. */
+        if (n->numslots == 1 && clusterMastersHaveSlaves())
+            n->flags |= CLUSTER_NODE_MIGRATE_TO;
+    }
+    return old;
+}
+
+/* Clear the slot bit and return the old value. */
+int clusterNodeClearSlotBit(clusterNode *n, int slot) {
+    int old = bitmapTestBit(n->slots,slot);
+    if (old) {
+        bitmapClearBit(n->slots,slot);
+        n->numslots--;
+    }
+    return old;
+}
+
+/* Return the slot bit from the cluster node structure. */
+int clusterNodeCoversSlot(clusterNode *n, int slot) {
+    return bitmapTestBit(n->slots,slot);
+}
+
+/* Add the specified slot to the list of slots that node 'n' will
+ * serve. Return C_OK if the operation ended with success.
+ * If the slot is already assigned to another instance this is considered
+ * an error and C_ERR is returned. */
+int clusterAddSlot(clusterNode *n, int slot) {
+    if (server.cluster->slots[slot]) return C_ERR;
+    clusterNodeSetSlotBit(n,slot);
+    server.cluster->slots[slot] = n;
+    return C_OK;
+}
+
+/* Delete the specified slot marking it as unassigned.
+ * Returns C_OK if the slot was assigned, otherwise if the slot was
+ * already unassigned C_ERR is returned. */
+int clusterDelSlot(int slot) {
+    clusterNode *n = server.cluster->slots[slot];
+
+    if (!n) return C_ERR;
+
+    /* Cleanup the channels in master/replica as part of slot deletion. */
+    removeChannelsInSlot(slot);
+    /* Clear the slot bit. */
+    serverAssert(clusterNodeClearSlotBit(n,slot) == 1);
+    server.cluster->slots[slot] = NULL;
+    /* Make owner_not_claiming_slot flag consistent with slot ownership information. */
+    bitmapClearBit(server.cluster->owner_not_claiming_slot, slot);
+    return C_OK;
+}
+
+/* Transfer slots from `from_node` to `to_node`.
+ * Iterates over all cluster slots, transferring each slot covered by `from_node` to `to_node`.
+ * Counts and returns the number of slots transferred.  */
+int clusterMoveNodeSlots(clusterNode *from_node, clusterNode *to_node) {
+    int processed = 0;
+
+    for (int j = 0; j < CLUSTER_SLOTS; j++) {
+        if (clusterNodeCoversSlot(from_node, j)) {
+            clusterDelSlot(j);
+            clusterAddSlot(to_node, j);
+            processed++;
+        }
+    }
+    return processed;
+}
+
+/* Delete all the slots associated with the specified node.
+ * The number of deleted slots is returned. */
+int clusterDelNodeSlots(clusterNode *node) {
+    int deleted = 0, j;
+
+    for (j = 0; j < CLUSTER_SLOTS; j++) {
+        if (clusterNodeCoversSlot(node, j)) {
+            clusterDelSlot(j);
+            deleted++;
+        }
+    }
+    return deleted;
+}
+
+/* Clear the migrating / importing state for all the slots.
+ * This is useful at initialization and when turning a master into slave. */
+void clusterCloseAllSlots(void) {
+    memset(server.cluster->migrating_slots_to,0,
+        sizeof(server.cluster->migrating_slots_to));
+    memset(server.cluster->importing_slots_from,0,
+        sizeof(server.cluster->importing_slots_from));
+}
+
+/* -----------------------------------------------------------------------------
+ * Cluster state evaluation function
+ * -------------------------------------------------------------------------- */
+
+/* The following are defines that are only used in the evaluation function
+ * and are based on heuristics. Actually the main point about the rejoin and
+ * writable delay is that they should be a few orders of magnitude larger
+ * than the network latency. */
+#define CLUSTER_MAX_REJOIN_DELAY 5000
+#define CLUSTER_MIN_REJOIN_DELAY 500
+#define CLUSTER_WRITABLE_DELAY 2000
+
+void clusterUpdateState(void) {
+    int j, new_state;
+    int reachable_masters = 0;
+    static mstime_t among_minority_time;
+    static mstime_t first_call_time = 0;
+
+    server.cluster->todo_before_sleep &= ~CLUSTER_TODO_UPDATE_STATE;
+
+    /* If this is a master node, wait some time before turning the state
+     * into OK, since it is not a good idea to rejoin the cluster as a writable
+     * master, after a reboot, without giving the cluster a chance to
+     * reconfigure this node. Note that the delay is calculated starting from
+     * the first call to this function and not since the server start, in order
+     * to not count the DB loading time. */
+    if (first_call_time == 0) first_call_time = mstime();
+    if (clusterNodeIsMaster(myself) &&
+        server.cluster->state == CLUSTER_FAIL &&
+        mstime() - first_call_time < CLUSTER_WRITABLE_DELAY) return;
+
+    /* Start assuming the state is OK. We'll turn it into FAIL if there
+     * are the right conditions. */
+    new_state = CLUSTER_OK;
+
+    /* Check if all the slots are covered. */
+    if (server.cluster_require_full_coverage) {
+        for (j = 0; j < CLUSTER_SLOTS; j++) {
+            if (server.cluster->slots[j] == NULL ||
+                server.cluster->slots[j]->flags & (CLUSTER_NODE_FAIL))
+            {
+                new_state = CLUSTER_FAIL;
+                break;
+            }
+        }
+    }
+
+    /* Compute the cluster size, that is the number of master nodes
+     * serving at least a single slot.
+     *
+     * At the same time count the number of reachable masters having
+     * at least one slot. */
+    {
+        dictIterator *di;
+        dictEntry *de;
+
+        server.cluster->size = 0;
+        di = dictGetSafeIterator(server.cluster->nodes);
+        while((de = dictNext(di)) != NULL) {
+            clusterNode *node = dictGetVal(de);
+
+            if (clusterNodeIsMaster(node) && node->numslots) {
+                server.cluster->size++;
+                if ((node->flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) == 0)
+                    reachable_masters++;
+            }
+        }
+        dictReleaseIterator(di);
+    }
+
+    /* If we are in a minority partition, change the cluster state
+     * to FAIL. */
+    {
+        int needed_quorum = (server.cluster->size / 2) + 1;
+
+        if (reachable_masters < needed_quorum) {
+            new_state = CLUSTER_FAIL;
+            among_minority_time = mstime();
+        }
+    }
+
+    /* Log a state change */
+    if (new_state != server.cluster->state) {
+        mstime_t rejoin_delay = server.cluster_node_timeout;
+
+        /* If the instance is a master and was partitioned away with the
+         * minority, don't let it accept queries for some time after the
+         * partition heals, to make sure there is enough time to receive
+         * a configuration update. */
+        if (rejoin_delay > CLUSTER_MAX_REJOIN_DELAY)
+            rejoin_delay = CLUSTER_MAX_REJOIN_DELAY;
+        if (rejoin_delay < CLUSTER_MIN_REJOIN_DELAY)
+            rejoin_delay = CLUSTER_MIN_REJOIN_DELAY;
+
+        if (new_state == CLUSTER_OK &&
+            clusterNodeIsMaster(myself) &&
+            mstime() - among_minority_time < rejoin_delay)
+        {
+            return;
+        }
+
+        /* Change the state and log the event. */
+        serverLog(new_state == CLUSTER_OK ? LL_NOTICE : LL_WARNING,
+            "Cluster state changed: %s",
+            new_state == CLUSTER_OK ? "ok" : "fail");
+        server.cluster->state = new_state;
+    }
+}
+
+/* This function is called after the node startup in order to verify that data
+ * loaded from disk is in agreement with the cluster configuration:
+ *
+ * 1) If we find keys about hash slots we have no responsibility for, the
+ *    following happens:
+ *    A) If no other node is in charge according to the current cluster
+ *       configuration, we add these slots to our node.
+ *    B) If according to our config other nodes are already in charge for
+ *       this slots, we set the slots as IMPORTING from our point of view
+ *       in order to justify we have those slots, and in order to make
+ *       redis-cli aware of the issue, so that it can try to fix it.
+ * 2) If we find data in a DB different than DB0 we return C_ERR to
+ *    signal the caller it should quit the server with an error message
+ *    or take other actions.
+ *
+ * The function always returns C_OK even if it will try to correct
+ * the error described in "1". However if data is found in DB different
+ * from DB0, C_ERR is returned.
+ *
+ * The function also uses the logging facility in order to warn the user
+ * about desynchronizations between the data we have in memory and the
+ * cluster configuration. */
+int verifyClusterConfigWithData(void) {
+    int j;
+    int update_config = 0;
+
+    /* Return ASAP if a module disabled cluster redirections. In that case
+     * every master can store keys about every possible hash slot. */
+    if (server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_REDIRECTION)
+        return C_OK;
+
+    /* If this node is a slave, don't perform the check at all as we
+     * completely depend on the replication stream. */
+    if (nodeIsSlave(myself)) return C_OK;
+
+    /* Make sure we only have keys in DB0. */
+    for (j = 1; j < server.dbnum; j++) {
+        if (kvstoreSize(server.db[j].keys)) return C_ERR;
+    }
+
+    /* Check that all the slots we see populated memory have a corresponding
+     * entry in the cluster table. Otherwise fix the table. */
+    for (j = 0; j < CLUSTER_SLOTS; j++) {
+        if (!countKeysInSlot(j)) continue; /* No keys in this slot. */
+        /* Check if we are assigned to this slot or if we are importing it.
+         * In both cases check the next slot as the configuration makes
+         * sense. */
+        if (server.cluster->slots[j] == myself ||
+            server.cluster->importing_slots_from[j] != NULL) continue;
+
+        /* If we are here data and cluster config don't agree, and we have
+         * slot 'j' populated even if we are not importing it, nor we are
+         * assigned to this slot. Fix this condition. */
+
+        update_config++;
+        /* Case A: slot is unassigned. Take responsibility for it. */
+        if (server.cluster->slots[j] == NULL) {
+            serverLog(LL_NOTICE, "I have keys for unassigned slot %d. "
+                                    "Taking responsibility for it.",j);
+            clusterAddSlot(myself,j);
+        } else {
+            serverLog(LL_NOTICE, "I have keys for slot %d, but the slot is "
+                                    "assigned to another node. "
+                                    "Setting it to importing state.",j);
+            server.cluster->importing_slots_from[j] = server.cluster->slots[j];
+        }
+    }
+    if (update_config) clusterSaveConfigOrDie(1);
+    return C_OK;
+}
+
+/* Remove all the shard channel related information not owned by the current shard. */
+static inline void removeAllNotOwnedShardChannelSubscriptions(void) {
+    if (!kvstoreSize(server.pubsubshard_channels)) return;
+    clusterNode *currmaster = clusterNodeIsMaster(myself) ? myself : myself->slaveof;
+    for (int j = 0; j < CLUSTER_SLOTS; j++) {
+        if (server.cluster->slots[j] != currmaster) {
+            removeChannelsInSlot(j);
+        }
+    }
+}
+
+/* -----------------------------------------------------------------------------
+ * SLAVE nodes handling
+ * -------------------------------------------------------------------------- */
+
+/* Set the specified node 'n' as master for this node.
+ * If this node is currently a master, it is turned into a slave. */
+void clusterSetMaster(clusterNode *n) {
+    serverAssert(n != myself);
+    serverAssert(myself->numslots == 0);
+
+    if (clusterNodeIsMaster(myself)) {
+        myself->flags &= ~(CLUSTER_NODE_MASTER|CLUSTER_NODE_MIGRATE_TO);
+        myself->flags |= CLUSTER_NODE_SLAVE;
+        clusterCloseAllSlots();
+    } else {
+        if (myself->slaveof)
+            clusterNodeRemoveSlave(myself->slaveof,myself);
+    }
+    myself->slaveof = n;
+    updateShardId(myself, n->shard_id);
+    clusterNodeAddSlave(n,myself);
+    replicationSetMaster(n->ip, getNodeDefaultReplicationPort(n));
+    removeAllNotOwnedShardChannelSubscriptions();
+    resetManualFailover();
+}
+
+/* -----------------------------------------------------------------------------
+ * Nodes to string representation functions.
+ * -------------------------------------------------------------------------- */
+
+struct redisNodeFlags {
+    uint16_t flag;
+    char *name;
+};
+
+static struct redisNodeFlags redisNodeFlagsTable[] = {
+    {CLUSTER_NODE_MYSELF,       "myself,"},
+    {CLUSTER_NODE_MASTER,       "master,"},
+    {CLUSTER_NODE_SLAVE,        "slave,"},
+    {CLUSTER_NODE_PFAIL,        "fail?,"},
+    {CLUSTER_NODE_FAIL,         "fail,"},
+    {CLUSTER_NODE_HANDSHAKE,    "handshake,"},
+    {CLUSTER_NODE_NOADDR,       "noaddr,"},
+    {CLUSTER_NODE_NOFAILOVER,   "nofailover,"}
+};
+
+/* Concatenate the comma separated list of node flags to the given SDS
+ * string 'ci'. */
+sds representClusterNodeFlags(sds ci, uint16_t flags) {
+    size_t orig_len = sdslen(ci);
+    int i, size = sizeof(redisNodeFlagsTable)/sizeof(struct redisNodeFlags);
+    for (i = 0; i < size; i++) {
+        struct redisNodeFlags *nodeflag = redisNodeFlagsTable + i;
+        if (flags & nodeflag->flag) ci = sdscat(ci, nodeflag->name);
+    }
+    /* If no flag was added, add the "noflags" special flag. */
+    if (sdslen(ci) == orig_len) ci = sdscat(ci,"noflags,");
+    sdsIncrLen(ci,-1); /* Remove trailing comma. */
+    return ci;
+}
+
+/* Concatenate the slot ownership information to the given SDS string 'ci'.
+ * If the slot ownership is in a contiguous block, it's represented as start-end pair,
+ * else each slot is added separately. */
+sds representSlotInfo(sds ci, uint16_t *slot_info_pairs, int slot_info_pairs_count) {
+    for (int i = 0; i< slot_info_pairs_count; i+=2) {
+        unsigned long start = slot_info_pairs[i];
+        unsigned long end = slot_info_pairs[i+1];
+        if (start == end) {
+            ci = sdscatfmt(ci, " %i", start);
+        } else {
+            ci = sdscatfmt(ci, " %i-%i", start, end);
+        }
+    }
+    return ci;
+}
+
+/* Generate a csv-alike representation of the specified cluster node.
+ * See clusterGenNodesDescription() top comment for more information.
+ *
+ * The function returns the string representation as an SDS string. */
+sds clusterGenNodeDescription(client *c, clusterNode *node, int tls_primary) {
+    int j, start;
+    sds ci;
+    int port = clusterNodeClientPort(node, tls_primary);
+
+    /* Node coordinates */
+    ci = sdscatlen(sdsempty(),node->name,CLUSTER_NAMELEN);
+    ci = sdscatfmt(ci," %s:%i@%i",
+        node->ip,
+        port,
+        node->cport);
+    if (sdslen(node->hostname) != 0) {
+        ci = sdscatfmt(ci,",%s", node->hostname);
+    }
+    /* Don't expose aux fields to any clients yet but do allow them
+     * to be persisted to nodes.conf */
+    if (c == NULL) {
+        if (sdslen(node->hostname) == 0) {
+            ci = sdscatfmt(ci,",", 1);
+        }
+        for (int i = af_count-1; i >=0; i--) {
+            if ((tls_primary && i == af_tls_port) || (!tls_primary && i == af_tcp_port)) {
+                continue;
+            }
+            if (auxFieldHandlers[i].isPresent(node)) {
+                ci = sdscatprintf(ci, ",%s=", auxFieldHandlers[i].field);
+                ci = auxFieldHandlers[i].getter(node, ci);
+            }
+        }
+    }
+
+    /* Flags */
+    ci = sdscatlen(ci," ",1);
+    ci = representClusterNodeFlags(ci, node->flags);
+
+    /* Slave of... or just "-" */
+    ci = sdscatlen(ci," ",1);
+    if (node->slaveof)
+        ci = sdscatlen(ci,node->slaveof->name,CLUSTER_NAMELEN);
+    else
+        ci = sdscatlen(ci,"-",1);
+
+    unsigned long long nodeEpoch = node->configEpoch;
+    if (nodeIsSlave(node) && node->slaveof) {
+        nodeEpoch = node->slaveof->configEpoch;
+    }
+    /* Latency from the POV of this node, config epoch, link status */
+    ci = sdscatfmt(ci," %I %I %U %s",
+        (long long) node->ping_sent,
+        (long long) node->pong_received,
+        nodeEpoch,
+        (node->link || node->flags & CLUSTER_NODE_MYSELF) ?
+                    "connected" : "disconnected");
+
+    /* Slots served by this instance. If we already have slots info,
+     * append it directly, otherwise, generate slots only if it has. */
+    if (node->slot_info_pairs) {
+        ci = representSlotInfo(ci, node->slot_info_pairs, node->slot_info_pairs_count);
+    } else if (node->numslots > 0) {
+        start = -1;
+        for (j = 0; j < CLUSTER_SLOTS; j++) {
+            int bit;
+
+            if ((bit = clusterNodeCoversSlot(node, j)) != 0) {
+                if (start == -1) start = j;
+            }
+            if (start != -1 && (!bit || j == CLUSTER_SLOTS-1)) {
+                if (bit && j == CLUSTER_SLOTS-1) j++;
+
+                if (start == j-1) {
+                    ci = sdscatfmt(ci," %i",start);
+                } else {
+                    ci = sdscatfmt(ci," %i-%i",start,j-1);
+                }
+                start = -1;
+            }
+        }
+    }
+
+    /* Just for MYSELF node we also dump info about slots that
+     * we are migrating to other instances or importing from other
+     * instances. */
+    if (node->flags & CLUSTER_NODE_MYSELF) {
+        for (j = 0; j < CLUSTER_SLOTS; j++) {
+            if (server.cluster->migrating_slots_to[j]) {
+                ci = sdscatprintf(ci," [%d->-%.40s]",j,
+                    server.cluster->migrating_slots_to[j]->name);
+            } else if (server.cluster->importing_slots_from[j]) {
+                ci = sdscatprintf(ci," [%d-<-%.40s]",j,
+                    server.cluster->importing_slots_from[j]->name);
+            }
+        }
+    }
+    return ci;
+}
+
+/* Generate the slot topology for all nodes and store the string representation
+ * in the slots_info struct on the node. This is used to improve the efficiency
+ * of clusterGenNodesDescription() because it removes looping of the slot space
+ * for generating the slot info for each node individually. */
+void clusterGenNodesSlotsInfo(int filter) {
+    clusterNode *n = NULL;
+    int start = -1;
+
+    for (int i = 0; i <= CLUSTER_SLOTS; i++) {
+        /* Find start node and slot id. */
+        if (n == NULL) {
+            if (i == CLUSTER_SLOTS) break;
+            n = server.cluster->slots[i];
+            start = i;
+            continue;
+        }
+
+        /* Generate slots info when occur different node with start
+         * or end of slot. */
+        if (i == CLUSTER_SLOTS || n != server.cluster->slots[i]) {
+            if (!(n->flags & filter)) {
+                if (!n->slot_info_pairs) {
+                    n->slot_info_pairs = zmalloc(2 * n->numslots * sizeof(uint16_t));
+                }
+                serverAssert((n->slot_info_pairs_count + 1) < (2 * n->numslots));
+                n->slot_info_pairs[n->slot_info_pairs_count++] = start;
+                n->slot_info_pairs[n->slot_info_pairs_count++] = i-1;
+            }
+            if (i == CLUSTER_SLOTS) break;
+            n = server.cluster->slots[i];
+            start = i;
+        }
+    }
+}
+
+void clusterFreeNodesSlotsInfo(clusterNode *n) {
+    zfree(n->slot_info_pairs);
+    n->slot_info_pairs = NULL;
+    n->slot_info_pairs_count = 0;
+}
+
+/* Generate a csv-alike representation of the nodes we are aware of,
+ * including the "myself" node, and return an SDS string containing the
+ * representation (it is up to the caller to free it).
+ *
+ * All the nodes matching at least one of the node flags specified in
+ * "filter" are excluded from the output, so using zero as a filter will
+ * include all the known nodes in the representation, including nodes in
+ * the HANDSHAKE state.
+ *
+ * Setting tls_primary to 1 to put TLS port in the main <ip>:<port> 
+ * field and put TCP port in aux field, instead of the opposite way.
+ *
+ * The representation obtained using this function is used for the output
+ * of the CLUSTER NODES function, and as format for the cluster
+ * configuration file (nodes.conf) for a given node. */
+sds clusterGenNodesDescription(client *c, int filter, int tls_primary) {
+    sds ci = sdsempty(), ni;
+    dictIterator *di;
+    dictEntry *de;
+
+    /* Generate all nodes slots info firstly. */
+    clusterGenNodesSlotsInfo(filter);
+
+    di = dictGetSafeIterator(server.cluster->nodes);
+    while((de = dictNext(di)) != NULL) {
+        clusterNode *node = dictGetVal(de);
+
+        if (node->flags & filter) continue;
+        ni = clusterGenNodeDescription(c, node, tls_primary);
+        ci = sdscatsds(ci,ni);
+        sdsfree(ni);
+        ci = sdscatlen(ci,"\n",1);
+
+        /* Release slots info. */
+        clusterFreeNodesSlotsInfo(node);
+    }
+    dictReleaseIterator(di);
+    return ci;
+}
+
+/* Add to the output buffer of the given client the description of the given cluster link.
+ * The description is a map with each entry being an attribute of the link. */
+void addReplyClusterLinkDescription(client *c, clusterLink *link) {
+    addReplyMapLen(c, 6);
+
+    addReplyBulkCString(c, "direction");
+    addReplyBulkCString(c, link->inbound ? "from" : "to");
+
+    /* addReplyClusterLinkDescription is only called for links that have been
+     * associated with nodes. The association is always bi-directional, so
+     * in addReplyClusterLinkDescription, link->node should never be NULL. */
+    serverAssert(link->node);
+    sds node_name = sdsnewlen(link->node->name, CLUSTER_NAMELEN);
+    addReplyBulkCString(c, "node");
+    addReplyBulkCString(c, node_name);
+    sdsfree(node_name);
+
+    addReplyBulkCString(c, "create-time");
+    addReplyLongLong(c, link->ctime);
+
+    char events[3], *p;
+    p = events;
+    if (link->conn) {
+        if (connHasReadHandler(link->conn)) *p++ = 'r';
+        if (connHasWriteHandler(link->conn)) *p++ = 'w';
+    }
+    *p = '\0';
+    addReplyBulkCString(c, "events");
+    addReplyBulkCString(c, events);
+
+    addReplyBulkCString(c, "send-buffer-allocated");
+    addReplyLongLong(c, link->send_msg_queue_mem);
+
+    addReplyBulkCString(c, "send-buffer-used");
+    addReplyLongLong(c, link->send_msg_queue_mem);
+}
+
+/* Add to the output buffer of the given client an array of cluster link descriptions,
+ * with array entry being a description of a single current cluster link. */
+void addReplyClusterLinksDescription(client *c) {
+    dictIterator *di;
+    dictEntry *de;
+    void *arraylen_ptr = NULL;
+    int num_links = 0;
+
+    arraylen_ptr = addReplyDeferredLen(c);
+
+    di = dictGetSafeIterator(server.cluster->nodes);
+    while((de = dictNext(di)) != NULL) {
+        clusterNode *node = dictGetVal(de);
+        if (node->link) {
+            num_links++;
+            addReplyClusterLinkDescription(c, node->link);
+        }
+        if (node->inbound_link) {
+            num_links++;
+            addReplyClusterLinkDescription(c, node->inbound_link);
+        }
+    }
+    dictReleaseIterator(di);
+
+    setDeferredArrayLen(c, arraylen_ptr, num_links);
+}
+
+/* -----------------------------------------------------------------------------
+ * CLUSTER command
+ * -------------------------------------------------------------------------- */
+
+const char *clusterGetMessageTypeString(int type) {
+    switch(type) {
+    case CLUSTERMSG_TYPE_PING: return "ping";
+    case CLUSTERMSG_TYPE_PONG: return "pong";
+    case CLUSTERMSG_TYPE_MEET: return "meet";
+    case CLUSTERMSG_TYPE_FAIL: return "fail";
+    case CLUSTERMSG_TYPE_PUBLISH: return "publish";
+    case CLUSTERMSG_TYPE_PUBLISHSHARD: return "publishshard";
+    case CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST: return "auth-req";
+    case CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK: return "auth-ack";
+    case CLUSTERMSG_TYPE_UPDATE: return "update";
+    case CLUSTERMSG_TYPE_MFSTART: return "mfstart";
+    case CLUSTERMSG_TYPE_MODULE: return "module";
+    }
+    return "unknown";
+}
+
+int getSlotOrReply(client *c, robj *o) {
+    long long slot;
+
+    if (getLongLongFromObject(o,&slot) != C_OK ||
+        slot < 0 || slot >= CLUSTER_SLOTS)
+    {
+        addReplyError(c,"Invalid or out of range slot");
+        return -1;
+    }
+    return (int) slot;
+}
+
+int checkSlotAssignmentsOrReply(client *c, unsigned char *slots, int del, int start_slot, int end_slot) {
+    int slot;
+    for (slot = start_slot; slot <= end_slot; slot++) {
+        if (del && server.cluster->slots[slot] == NULL) {
+            addReplyErrorFormat(c,"Slot %d is already unassigned", slot);
+            return C_ERR;
+        } else if (!del && server.cluster->slots[slot]) {
+            addReplyErrorFormat(c,"Slot %d is already busy", slot);
+            return C_ERR;
+        }
+        if (slots[slot]++ == 1) {
+            addReplyErrorFormat(c,"Slot %d specified multiple times",(int)slot);
+            return C_ERR;
+        }
+    }
+    return C_OK;
+}
+
+void clusterUpdateSlots(client *c, unsigned char *slots, int del) {
+    int j;
+    for (j = 0; j < CLUSTER_SLOTS; j++) {
+        if (slots[j]) {
+            int retval;
+                
+            /* If this slot was set as importing we can clear this
+             * state as now we are the real owner of the slot. */
+            if (server.cluster->importing_slots_from[j])
+                server.cluster->importing_slots_from[j] = NULL;
+
+            retval = del ? clusterDelSlot(j) :
+                           clusterAddSlot(myself,j);
+            serverAssertWithInfo(c,NULL,retval == C_OK);
+        }
+    }
+}
+
+/* Add detailed information of a node to the output buffer of the given client. */
+void addNodeDetailsToShardReply(client *c, clusterNode *node) {
+    int reply_count = 0;
+    void *node_replylen = addReplyDeferredLen(c);
+    addReplyBulkCString(c, "id");
+    addReplyBulkCBuffer(c, node->name, CLUSTER_NAMELEN);
+    reply_count++;
+
+    if (node->tcp_port) {
+        addReplyBulkCString(c, "port");
+        addReplyLongLong(c, node->tcp_port);
+        reply_count++;
+    }
+
+    if (node->tls_port) {
+        addReplyBulkCString(c, "tls-port");
+        addReplyLongLong(c, node->tls_port);
+        reply_count++;
+    }
+
+    addReplyBulkCString(c, "ip");
+    addReplyBulkCString(c, node->ip);
+    reply_count++;
+
+    addReplyBulkCString(c, "endpoint");
+    addReplyBulkCString(c, clusterNodePreferredEndpoint(node));
+    reply_count++;
+
+    if (sdslen(node->hostname) != 0) {
+        addReplyBulkCString(c, "hostname");
+        addReplyBulkCBuffer(c, node->hostname, sdslen(node->hostname));
+        reply_count++;
+    }
+
+    long long node_offset;
+    if (node->flags & CLUSTER_NODE_MYSELF) {
+        node_offset = nodeIsSlave(node) ? replicationGetSlaveOffset() : server.master_repl_offset;
+    } else {
+        node_offset = node->repl_offset;
+    }
+
+    addReplyBulkCString(c, "role");
+    addReplyBulkCString(c, nodeIsSlave(node) ? "replica" : "master");
+    reply_count++;
+
+    addReplyBulkCString(c, "replication-offset");
+    addReplyLongLong(c, node_offset);
+    reply_count++;
+
+    addReplyBulkCString(c, "health");
+    const char *health_msg = NULL;
+    if (nodeFailed(node)) {
+        health_msg = "fail";
+    } else if (nodeIsSlave(node) && node_offset == 0) {
+        health_msg = "loading";
+    } else {
+        health_msg = "online";
+    }
+    addReplyBulkCString(c, health_msg);
+    reply_count++;
+
+    setDeferredMapLen(c, node_replylen, reply_count);
+}
+
+/* Add the shard reply of a single shard based off the given primary node. */
+void addShardReplyForClusterShards(client *c, list *nodes) {
+    serverAssert(listLength(nodes) > 0);
+    clusterNode *n = listNodeValue(listFirst(nodes));
+    addReplyMapLen(c, 2);
+    addReplyBulkCString(c, "slots");
+
+    /* Use slot_info_pairs from the primary only */
+    n = clusterNodeGetMaster(n);
+
+    if (n->slot_info_pairs != NULL) {
+        serverAssert((n->slot_info_pairs_count % 2) == 0);
+        addReplyArrayLen(c, n->slot_info_pairs_count);
+        for (int i = 0; i < n->slot_info_pairs_count; i++)
+            addReplyLongLong(c, (unsigned long)n->slot_info_pairs[i]);
+    } else {
+        /* If no slot info pair is provided, the node owns no slots */
+        addReplyArrayLen(c, 0);
+    }
+
+    addReplyBulkCString(c, "nodes");
+    addReplyArrayLen(c, listLength(nodes));
+    listIter li;
+    listRewind(nodes, &li);
+    for (listNode *ln = listNext(&li); ln != NULL; ln = listNext(&li)) {
+        clusterNode *n = listNodeValue(ln);
+        addNodeDetailsToShardReply(c, n);
+        clusterFreeNodesSlotsInfo(n);
+    }
+}
+
+/* Add to the output buffer of the given client, an array of slot (start, end)
+ * pair owned by the shard, also the primary and set of replica(s) along with
+ * information about each node. */
+void clusterCommandShards(client *c) {
+    addReplyArrayLen(c, dictSize(server.cluster->shards));
+    /* This call will add slot_info_pairs to all nodes */
+    clusterGenNodesSlotsInfo(0);
+    dictIterator *di = dictGetSafeIterator(server.cluster->shards);
+    for(dictEntry *de = dictNext(di); de != NULL; de = dictNext(di)) {
+        addShardReplyForClusterShards(c, dictGetVal(de));
+    }
+    dictReleaseIterator(di);
+}
+
+sds genClusterInfoString(void) {
+    sds info = sdsempty();
+    char *statestr[] = {"ok","fail"};
+    int slots_assigned = 0, slots_ok = 0, slots_pfail = 0, slots_fail = 0;
+    uint64_t myepoch;
+    int j;
+
+    for (j = 0; j < CLUSTER_SLOTS; j++) {
+        clusterNode *n = server.cluster->slots[j];
+
+        if (n == NULL) continue;
+        slots_assigned++;
+        if (nodeFailed(n)) {
+            slots_fail++;
+        } else if (nodeTimedOut(n)) {
+            slots_pfail++;
+        } else {
+            slots_ok++;
+        }
+    }
+
+    myepoch = (nodeIsSlave(myself) && myself->slaveof) ?
+                myself->slaveof->configEpoch : myself->configEpoch;
+
+    info = sdscatprintf(info,
+        "cluster_state:%s\r\n"
+        "cluster_slots_assigned:%d\r\n"
+        "cluster_slots_ok:%d\r\n"
+        "cluster_slots_pfail:%d\r\n"
+        "cluster_slots_fail:%d\r\n"
+        "cluster_known_nodes:%lu\r\n"
+        "cluster_size:%d\r\n"
+        "cluster_current_epoch:%llu\r\n"
+        "cluster_my_epoch:%llu\r\n"
+        , statestr[server.cluster->state],
+        slots_assigned,
+        slots_ok,
+        slots_pfail,
+        slots_fail,
+        dictSize(server.cluster->nodes),
+        server.cluster->size,
+        (unsigned long long) server.cluster->currentEpoch,
+        (unsigned long long) myepoch
+    );
+
+    /* Show stats about messages sent and received. */
+    long long tot_msg_sent = 0;
+    long long tot_msg_received = 0;
+
+    for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) {
+        if (server.cluster->stats_bus_messages_sent[i] == 0) continue;
+        tot_msg_sent += server.cluster->stats_bus_messages_sent[i];
+        info = sdscatprintf(info,
+            "cluster_stats_messages_%s_sent:%lld\r\n",
+            clusterGetMessageTypeString(i),
+            server.cluster->stats_bus_messages_sent[i]);
+    }
+    info = sdscatprintf(info,
+        "cluster_stats_messages_sent:%lld\r\n", tot_msg_sent);
+
+    for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) {
+        if (server.cluster->stats_bus_messages_received[i] == 0) continue;
+        tot_msg_received += server.cluster->stats_bus_messages_received[i];
+        info = sdscatprintf(info,
+            "cluster_stats_messages_%s_received:%lld\r\n",
+            clusterGetMessageTypeString(i),
+            server.cluster->stats_bus_messages_received[i]);
+    }
+    info = sdscatprintf(info,
+        "cluster_stats_messages_received:%lld\r\n", tot_msg_received);
+
+    info = sdscatprintf(info,
+        "total_cluster_links_buffer_limit_exceeded:%llu\r\n",
+        server.cluster->stat_cluster_links_buffer_limit_exceeded);
+
+    return info;
+}
+
+
+void removeChannelsInSlot(unsigned int slot) {
+    if (countChannelsInSlot(slot) == 0) return;
+
+    pubsubShardUnsubscribeAllChannelsInSlot(slot);
+}
+
+/* Remove all the keys in the specified hash slot.
+ * The number of removed items is returned. */
+unsigned int delKeysInSlot(unsigned int hashslot) {
+    if (!kvstoreDictSize(server.db->keys, hashslot))
+        return 0;
+
+    unsigned int j = 0;
+
+    kvstoreDictIterator *kvs_di = NULL;
+    dictEntry *de = NULL;
+    kvs_di = kvstoreGetDictSafeIterator(server.db->keys, hashslot);
+    while((de = kvstoreDictIteratorNext(kvs_di)) != NULL) {
+        enterExecutionUnit(1, 0);
+        sds sdskey = dictGetKey(de);
+        robj *key = createStringObject(sdskey, sdslen(sdskey));
+        dbDelete(&server.db[0], key);
+        propagateDeletion(&server.db[0], key, server.lazyfree_lazy_server_del);
+        signalModifiedKey(NULL, &server.db[0], key);
+        /* The keys are not actually logically deleted from the database, just moved to another node.
+         * The modules needs to know that these keys are no longer available locally, so just send the
+         * keyspace notification to the modules, but not to clients. */
+        moduleNotifyKeyspaceEvent(NOTIFY_GENERIC, "del", key, server.db[0].id);
+        exitExecutionUnit();
+        postExecutionUnitOperations();
+        decrRefCount(key);
+        j++;
+        server.dirty++;
+    }
+    kvstoreReleaseDictIterator(kvs_di);
+
+    return j;
+}
+
+/* Get the count of the channels for a given slot. */
+unsigned int countChannelsInSlot(unsigned int hashslot) {
+    return kvstoreDictSize(server.pubsubshard_channels, hashslot);
+}
+
+int clusterNodeIsMyself(clusterNode *n) {
+    return n == server.cluster->myself;
+}
+
+clusterNode *getMyClusterNode(void) {
+    return server.cluster->myself;
+}
+
+int clusterManualFailoverTimeLimit(void) {
+    return server.cluster->mf_end;
+}
+
+int getClusterSize(void) {
+    return dictSize(server.cluster->nodes);
+}
+
+int getMyShardSlotCount(void) {
+    if (!nodeIsSlave(server.cluster->myself)) {
+        return server.cluster->myself->numslots;
+    } else if (server.cluster->myself->slaveof) {
+        return server.cluster->myself->slaveof->numslots;
+    } else {
+        return 0;
+    }
+}
+
+char **getClusterNodesList(size_t *numnodes) {
+    size_t count = dictSize(server.cluster->nodes);
+    char **ids = zmalloc((count+1)*CLUSTER_NAMELEN);
+    dictIterator *di = dictGetIterator(server.cluster->nodes);
+    dictEntry *de;
+    int j = 0;
+    while((de = dictNext(di)) != NULL) {
+        clusterNode *node = dictGetVal(de);
+        if (node->flags & (CLUSTER_NODE_NOADDR|CLUSTER_NODE_HANDSHAKE)) continue;
+        ids[j] = zmalloc(CLUSTER_NAMELEN);
+        memcpy(ids[j],node->name,CLUSTER_NAMELEN);
+        j++;
+    }
+    *numnodes = j;
+    ids[j] = NULL; /* Null term so that FreeClusterNodesList does not need
+                    * to also get the count argument. */
+    dictReleaseIterator(di);
+    return ids;
+}
+
+int clusterNodeIsMaster(clusterNode *n) {
+    return n->flags & CLUSTER_NODE_MASTER;
+}
+
+int handleDebugClusterCommand(client *c) {
+    if (strcasecmp(c->argv[1]->ptr, "CLUSTERLINK") ||
+        strcasecmp(c->argv[2]->ptr, "KILL") ||
+        c->argc != 5) {
+        return 0;
+    }
+
+    if (!server.cluster_enabled) {
+        addReplyError(c, "Debug option only available for cluster mode enabled setup!");
+        return 1;
+    }
+
+    /* Find the node. */
+    clusterNode *n = clusterLookupNode(c->argv[4]->ptr, sdslen(c->argv[4]->ptr));
+    if (!n) {
+        addReplyErrorFormat(c, "Unknown node %s", (char *) c->argv[4]->ptr);
+        return 1;
+    }
+
+    /* Terminate the link based on the direction or all. */
+    if (!strcasecmp(c->argv[3]->ptr, "from")) {
+        if (n->inbound_link) freeClusterLink(n->inbound_link);
+    } else if (!strcasecmp(c->argv[3]->ptr, "to")) {
+        if (n->link) freeClusterLink(n->link);
+    } else if (!strcasecmp(c->argv[3]->ptr, "all")) {
+        if (n->link) freeClusterLink(n->link);
+        if (n->inbound_link) freeClusterLink(n->inbound_link);
+    } else {
+        addReplyErrorFormat(c, "Unknown direction %s", (char *) c->argv[3]->ptr);
+    }
+    addReply(c, shared.ok);
+
+    return 1;
+}
+
+int clusterNodePending(clusterNode  *node) {
+    return node->flags & (CLUSTER_NODE_NOADDR|CLUSTER_NODE_HANDSHAKE);
+}
+
+char *clusterNodeIp(clusterNode *node) {
+    return node->ip;
+}
+
+int clusterNodeIsSlave(clusterNode *node) {
+    return node->flags & CLUSTER_NODE_SLAVE;
+}
+
+clusterNode *clusterNodeGetSlaveof(clusterNode *node) {
+    return node->slaveof;
+}
+
+clusterNode *clusterNodeGetMaster(clusterNode *node) {
+    while (node->slaveof != NULL) node = node->slaveof;
+    return node;
+}
+
+char *clusterNodeGetName(clusterNode *node) {
+    return node->name;
+}
+
+int clusterNodeTimedOut(clusterNode *node) {
+    return nodeTimedOut(node);
+}
+
+int clusterNodeIsFailing(clusterNode *node) {
+    return nodeFailed(node);
+}
+
+int clusterNodeIsNoFailover(clusterNode *node) {
+    return node->flags & CLUSTER_NODE_NOFAILOVER;
+}
+
+const char **clusterDebugCommandExtendedHelp(void) {
+    static const char *help[] = {
+        "CLUSTERLINK KILL <to|from|all> <node-id>",
+        "    Kills the link based on the direction to/from (both) with the provided node.",
+        NULL
+    };
+
+    return help;
+}
+
+char *clusterNodeGetShardId(clusterNode *node) {
+    return node->shard_id;
+}
+
+int clusterCommandSpecial(client *c) {
+    if (!strcasecmp(c->argv[1]->ptr,"meet") && (c->argc == 4 || c->argc == 5)) {
+        /* CLUSTER MEET <ip> <port> [cport] */
+        long long port, cport;
+
+        if (getLongLongFromObject(c->argv[3], &port) != C_OK) {
+            addReplyErrorFormat(c,"Invalid base port specified: %s",
+                                (char*)c->argv[3]->ptr);
+            return 1;
+        }
+
+        if (c->argc == 5) {
+            if (getLongLongFromObject(c->argv[4], &cport) != C_OK) {
+                addReplyErrorFormat(c,"Invalid bus port specified: %s",
+                                    (char*)c->argv[4]->ptr);
+                return 1;
+            }
+        } else {
+            cport = port + CLUSTER_PORT_INCR;
+        }
+
+        if (clusterStartHandshake(c->argv[2]->ptr,port,cport) == 0 &&
+            errno == EINVAL)
+        {
+            addReplyErrorFormat(c,"Invalid node address specified: %s:%s",
+                            (char*)c->argv[2]->ptr, (char*)c->argv[3]->ptr);
+        } else {
+            addReply(c,shared.ok);
+        }
+    } else if (!strcasecmp(c->argv[1]->ptr,"flushslots") && c->argc == 2) {
+        /* CLUSTER FLUSHSLOTS */
+        if (kvstoreSize(server.db[0].keys) != 0) {
+            addReplyError(c,"DB must be empty to perform CLUSTER FLUSHSLOTS.");
+            return 1;
+        }
+        clusterDelNodeSlots(myself);
+        clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
+        addReply(c,shared.ok);
+    } else if ((!strcasecmp(c->argv[1]->ptr,"addslots") ||
+                !strcasecmp(c->argv[1]->ptr,"delslots")) && c->argc >= 3) {
+        /* CLUSTER ADDSLOTS <slot> [slot] ... */
+        /* CLUSTER DELSLOTS <slot> [slot] ... */
+        int j, slot;
+        unsigned char *slots = zmalloc(CLUSTER_SLOTS);
+        int del = !strcasecmp(c->argv[1]->ptr,"delslots");
+
+        memset(slots,0,CLUSTER_SLOTS);
+        /* Check that all the arguments are parseable.*/
+        for (j = 2; j < c->argc; j++) {
+            if ((slot = getSlotOrReply(c,c->argv[j])) == C_ERR) {
+                zfree(slots);
+                return 1;
+            }
+        }
+        /* Check that the slots are not already busy. */
+        for (j = 2; j < c->argc; j++) {
+            slot = getSlotOrReply(c,c->argv[j]);
+            if (checkSlotAssignmentsOrReply(c, slots, del, slot, slot) == C_ERR) {
+                zfree(slots);
+                return 1;
+            }
+        }
+        clusterUpdateSlots(c, slots, del);
+        zfree(slots);
+        clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
+        addReply(c,shared.ok);
+    } else if ((!strcasecmp(c->argv[1]->ptr,"addslotsrange") ||
+               !strcasecmp(c->argv[1]->ptr,"delslotsrange")) && c->argc >= 4) {
+        if (c->argc % 2 == 1) {
+            addReplyErrorArity(c);
+            return 1;
+        }
+        /* CLUSTER ADDSLOTSRANGE <start slot> <end slot> [<start slot> <end slot> ...] */
+        /* CLUSTER DELSLOTSRANGE <start slot> <end slot> [<start slot> <end slot> ...] */
+        int j, startslot, endslot;
+        unsigned char *slots = zmalloc(CLUSTER_SLOTS);
+        int del = !strcasecmp(c->argv[1]->ptr,"delslotsrange");
+
+        memset(slots,0,CLUSTER_SLOTS);
+        /* Check that all the arguments are parseable and that all the
+         * slots are not already busy. */
+        for (j = 2; j < c->argc; j += 2) {
+            if ((startslot = getSlotOrReply(c,c->argv[j])) == C_ERR) {
+                zfree(slots);
+                return 1;
+            }
+            if ((endslot = getSlotOrReply(c,c->argv[j+1])) == C_ERR) {
+                zfree(slots);
+                return 1;
+            }
+            if (startslot > endslot) {
+                addReplyErrorFormat(c,"start slot number %d is greater than end slot number %d", startslot, endslot);
+                zfree(slots);
+                return 1;
+            }
+
+            if (checkSlotAssignmentsOrReply(c, slots, del, startslot, endslot) == C_ERR) {
+                zfree(slots);
+                return 1;
+            }
+        }
+        clusterUpdateSlots(c, slots, del);
+        zfree(slots);
+        clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
+        addReply(c,shared.ok);
+    } else if (!strcasecmp(c->argv[1]->ptr,"setslot") && c->argc >= 4) {
+        /* SETSLOT 10 MIGRATING <node ID> */
+        /* SETSLOT 10 IMPORTING <node ID> */
+        /* SETSLOT 10 STABLE */
+        /* SETSLOT 10 NODE <node ID> */
+        int slot;
+        clusterNode *n;
+
+        if (nodeIsSlave(myself)) {
+            addReplyError(c,"Please use SETSLOT only with masters.");
+            return 1;
+        }
+
+        if ((slot = getSlotOrReply(c, c->argv[2])) == -1) return 1;
+
+        if (!strcasecmp(c->argv[3]->ptr,"migrating") && c->argc == 5) {
+            if (server.cluster->slots[slot] != myself) {
+                addReplyErrorFormat(c,"I'm not the owner of hash slot %u",slot);
+                return 1;
+            }
+            n = clusterLookupNode(c->argv[4]->ptr, sdslen(c->argv[4]->ptr));
+            if (n == NULL) {
+                addReplyErrorFormat(c,"I don't know about node %s",
+                    (char*)c->argv[4]->ptr);
+                return 1;
+            }
+            if (nodeIsSlave(n)) {
+                addReplyError(c,"Target node is not a master");
+                return 1;
+            }
+            server.cluster->migrating_slots_to[slot] = n;
+        } else if (!strcasecmp(c->argv[3]->ptr,"importing") && c->argc == 5) {
+            if (server.cluster->slots[slot] == myself) {
+                addReplyErrorFormat(c,
+                    "I'm already the owner of hash slot %u",slot);
+                return 1;
+            }
+            n = clusterLookupNode(c->argv[4]->ptr, sdslen(c->argv[4]->ptr));
+            if (n == NULL) {
+                addReplyErrorFormat(c,"I don't know about node %s",
+                    (char*)c->argv[4]->ptr);
+                return 1;
+            }
+            if (nodeIsSlave(n)) {
+                addReplyError(c,"Target node is not a master");
+                return 1;
+            }
+            server.cluster->importing_slots_from[slot] = n;
+        } else if (!strcasecmp(c->argv[3]->ptr,"stable") && c->argc == 4) {
+            /* CLUSTER SETSLOT <SLOT> STABLE */
+            server.cluster->importing_slots_from[slot] = NULL;
+            server.cluster->migrating_slots_to[slot] = NULL;
+        } else if (!strcasecmp(c->argv[3]->ptr,"node") && c->argc == 5) {
+            /* CLUSTER SETSLOT <SLOT> NODE <NODE ID> */
+            n = clusterLookupNode(c->argv[4]->ptr, sdslen(c->argv[4]->ptr));
+            if (!n) {
+                addReplyErrorFormat(c,"Unknown node %s",
+                    (char*)c->argv[4]->ptr);
+                return 1;
+            }
+            if (nodeIsSlave(n)) {
+                addReplyError(c,"Target node is not a master");
+                return 1;
+            }
+            /* If this hash slot was served by 'myself' before to switch
+             * make sure there are no longer local keys for this hash slot. */
+            if (server.cluster->slots[slot] == myself && n != myself) {
+                if (countKeysInSlot(slot) != 0) {
+                    addReplyErrorFormat(c,
+                        "Can't assign hashslot %d to a different node "
+                        "while I still hold keys for this hash slot.", slot);
+                    return 1;
+                }
+            }
+            /* If this slot is in migrating status but we have no keys
+             * for it assigning the slot to another node will clear
+             * the migrating status. */
+            if (countKeysInSlot(slot) == 0 &&
+                server.cluster->migrating_slots_to[slot])
+                server.cluster->migrating_slots_to[slot] = NULL;
+
+            int slot_was_mine = server.cluster->slots[slot] == myself;
+            clusterDelSlot(slot);
+            clusterAddSlot(n,slot);
+
+            /* If we are a master left without slots, we should turn into a
+             * replica of the new master. */
+            if (slot_was_mine &&
+                n != myself &&
+                myself->numslots == 0 &&
+                server.cluster_allow_replica_migration) {
+                serverLog(LL_NOTICE,
+                          "Configuration change detected. Reconfiguring myself "
+                          "as a replica of %.40s (%s)", n->name, n->human_nodename);
+                clusterSetMaster(n);
+                clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG |
+                                     CLUSTER_TODO_UPDATE_STATE |
+                                     CLUSTER_TODO_FSYNC_CONFIG);
+            }
+
+            /* If this node was importing this slot, assigning the slot to
+             * itself also clears the importing status. */
+            if (n == myself &&
+                server.cluster->importing_slots_from[slot]) {
+                /* This slot was manually migrated, set this node configEpoch
+                 * to a new epoch so that the new version can be propagated
+                 * by the cluster.
+                 *
+                 * Note that if this ever results in a collision with another
+                 * node getting the same configEpoch, for example because a
+                 * failover happens at the same time we close the slot, the
+                 * configEpoch collision resolution will fix it assigning
+                 * a different epoch to each node. */
+                if (clusterBumpConfigEpochWithoutConsensus() == C_OK) {
+                    serverLog(LL_NOTICE,
+                        "configEpoch updated after importing slot %d", slot);
+                }
+                server.cluster->importing_slots_from[slot] = NULL;
+                /* After importing this slot, let the other nodes know as
+                 * soon as possible. */
+                clusterBroadcastPong(CLUSTER_BROADCAST_ALL);
+            }
+        } else {
+            addReplyError(c,
+                "Invalid CLUSTER SETSLOT action or number of arguments. Try CLUSTER HELP");
+            return 1;
+        }
+        clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|CLUSTER_TODO_UPDATE_STATE);
+        addReply(c,shared.ok);
+    } else if (!strcasecmp(c->argv[1]->ptr,"bumpepoch") && c->argc == 2) {
+        /* CLUSTER BUMPEPOCH */
+        int retval = clusterBumpConfigEpochWithoutConsensus();
+        sds reply = sdscatprintf(sdsempty(),"+%s %llu\r\n",
+                (retval == C_OK) ? "BUMPED" : "STILL",
+                (unsigned long long) myself->configEpoch);
+        addReplySds(c,reply);
+    } else if (!strcasecmp(c->argv[1]->ptr,"saveconfig") && c->argc == 2) {
+        int retval = clusterSaveConfig(1);
+
+        if (retval == 0)
+            addReply(c,shared.ok);
+        else
+            addReplyErrorFormat(c,"error saving the cluster node config: %s",
+                strerror(errno));
+    } else if (!strcasecmp(c->argv[1]->ptr,"forget") && c->argc == 3) {
+        /* CLUSTER FORGET <NODE ID> */
+        clusterNode *n = clusterLookupNode(c->argv[2]->ptr, sdslen(c->argv[2]->ptr));
+        if (!n) {
+            if (clusterBlacklistExists((char*)c->argv[2]->ptr))
+                /* Already forgotten. The deletion may have been gossipped by
+                 * another node, so we pretend it succeeded. */
+                addReply(c,shared.ok);
+            else
+                addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[2]->ptr);
+            return 1;
+        } else if (n == myself) {
+            addReplyError(c,"I tried hard but I can't forget myself...");
+            return 1;
+        } else if (nodeIsSlave(myself) && myself->slaveof == n) {
+            addReplyError(c,"Can't forget my master!");
+            return 1;
+        }
+        clusterBlacklistAddNode(n);
+        clusterDelNode(n);
+        clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|
+                             CLUSTER_TODO_SAVE_CONFIG);
+        addReply(c,shared.ok);
+    } else if (!strcasecmp(c->argv[1]->ptr,"replicate") && c->argc == 3) {
+        /* CLUSTER REPLICATE <NODE ID> */
+        /* Lookup the specified node in our table. */
+        clusterNode *n = clusterLookupNode(c->argv[2]->ptr, sdslen(c->argv[2]->ptr));
+        if (!n) {
+            addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[2]->ptr);
+            return 1;
+        }
+
+        /* I can't replicate myself. */
+        if (n == myself) {
+            addReplyError(c,"Can't replicate myself");
+            return 1;
+        }
+
+        /* Can't replicate a slave. */
+        if (nodeIsSlave(n)) {
+            addReplyError(c,"I can only replicate a master, not a replica.");
+            return 1;
+        }
+
+        /* If the instance is currently a master, it should have no assigned
+         * slots nor keys to accept to replicate some other node.
+         * Slaves can switch to another master without issues. */
+        if (clusterNodeIsMaster(myself) &&
+            (myself->numslots != 0 || kvstoreSize(server.db[0].keys) != 0)) {
+            addReplyError(c,
+                "To set a master the node must be empty and "
+                "without assigned slots.");
+            return 1;
+        }
+
+        /* Set the master. */
+        clusterSetMaster(n);
+        clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
+        addReply(c,shared.ok);
+    } else if (!strcasecmp(c->argv[1]->ptr,"count-failure-reports") &&
+               c->argc == 3)
+    {
+        /* CLUSTER COUNT-FAILURE-REPORTS <NODE ID> */
+        clusterNode *n = clusterLookupNode(c->argv[2]->ptr, sdslen(c->argv[2]->ptr));
+
+        if (!n) {
+            addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[2]->ptr);
+            return 1;
+        } else {
+            addReplyLongLong(c,clusterNodeFailureReportsCount(n));
+        }
+    } else if (!strcasecmp(c->argv[1]->ptr,"failover") &&
+               (c->argc == 2 || c->argc == 3))
+    {
+        /* CLUSTER FAILOVER [FORCE|TAKEOVER] */
+        int force = 0, takeover = 0;
+
+        if (c->argc == 3) {
+            if (!strcasecmp(c->argv[2]->ptr,"force")) {
+                force = 1;
+            } else if (!strcasecmp(c->argv[2]->ptr,"takeover")) {
+                takeover = 1;
+                force = 1; /* Takeover also implies force. */
+            } else {
+                addReplyErrorObject(c,shared.syntaxerr);
+                return 1;
+            }
+        }
+
+        /* Check preconditions. */
+        if (clusterNodeIsMaster(myself)) {
+            addReplyError(c,"You should send CLUSTER FAILOVER to a replica");
+            return 1;
+        } else if (myself->slaveof == NULL) {
+            addReplyError(c,"I'm a replica but my master is unknown to me");
+            return 1;
+        } else if (!force &&
+                   (nodeFailed(myself->slaveof) ||
+                    myself->slaveof->link == NULL))
+        {
+            addReplyError(c,"Master is down or failed, "
+                            "please use CLUSTER FAILOVER FORCE");
+            return 1;
+        }
+        resetManualFailover();
+        server.cluster->mf_end = mstime() + CLUSTER_MF_TIMEOUT;
+
+        if (takeover) {
+            /* A takeover does not perform any initial check. It just
+             * generates a new configuration epoch for this node without
+             * consensus, claims the master's slots, and broadcast the new
+             * configuration. */
+            serverLog(LL_NOTICE,"Taking over the master (user request).");
+            clusterBumpConfigEpochWithoutConsensus();
+            clusterFailoverReplaceYourMaster();
+        } else if (force) {
+            /* If this is a forced failover, we don't need to talk with our
+             * master to agree about the offset. We just failover taking over
+             * it without coordination. */
+            serverLog(LL_NOTICE,"Forced failover user request accepted.");
+            server.cluster->mf_can_start = 1;
+        } else {
+            serverLog(LL_NOTICE,"Manual failover user request accepted.");
+            clusterSendMFStart(myself->slaveof);
+        }
+        addReply(c,shared.ok);
+    } else if (!strcasecmp(c->argv[1]->ptr,"set-config-epoch") && c->argc == 3)
+    {
+        /* CLUSTER SET-CONFIG-EPOCH <epoch>
+         *
+         * The user is allowed to set the config epoch only when a node is
+         * totally fresh: no config epoch, no other known node, and so forth.
+         * This happens at cluster creation time to start with a cluster where
+         * every node has a different node ID, without to rely on the conflicts
+         * resolution system which is too slow when a big cluster is created. */
+        long long epoch;
+
+        if (getLongLongFromObjectOrReply(c,c->argv[2],&epoch,NULL) != C_OK)
+            return 1;
+
+        if (epoch < 0) {
+            addReplyErrorFormat(c,"Invalid config epoch specified: %lld",epoch);
+        } else if (dictSize(server.cluster->nodes) > 1) {
+            addReplyError(c,"The user can assign a config epoch only when the "
+                            "node does not know any other node.");
+        } else if (myself->configEpoch != 0) {
+            addReplyError(c,"Node config epoch is already non-zero");
+        } else {
+            myself->configEpoch = epoch;
+            serverLog(LL_NOTICE,
+                "configEpoch set to %llu via CLUSTER SET-CONFIG-EPOCH",
+                (unsigned long long) myself->configEpoch);
+
+            if (server.cluster->currentEpoch < (uint64_t)epoch)
+                server.cluster->currentEpoch = epoch;
+            /* No need to fsync the config here since in the unlucky event
+             * of a failure to persist the config, the conflict resolution code
+             * will assign a unique config to this node. */
+            clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|
+                                 CLUSTER_TODO_SAVE_CONFIG);
+            addReply(c,shared.ok);
+        }
+    } else if (!strcasecmp(c->argv[1]->ptr,"reset") &&
+               (c->argc == 2 || c->argc == 3))
+    {
+        /* CLUSTER RESET [SOFT|HARD] */
+        int hard = 0;
+
+        /* Parse soft/hard argument. Default is soft. */
+        if (c->argc == 3) {
+            if (!strcasecmp(c->argv[2]->ptr,"hard")) {
+                hard = 1;
+            } else if (!strcasecmp(c->argv[2]->ptr,"soft")) {
+                hard = 0;
+            } else {
+                addReplyErrorObject(c,shared.syntaxerr);
+                return 1;
+            }
+        }
+
+        /* Slaves can be reset while containing data, but not master nodes
+         * that must be empty. */
+        if (clusterNodeIsMaster(myself) && kvstoreSize(c->db->keys) != 0) {
+            addReplyError(c,"CLUSTER RESET can't be called with "
+                            "master nodes containing keys");
+            return 1;
+        }
+        clusterReset(hard);
+        addReply(c,shared.ok);
+    } else if (!strcasecmp(c->argv[1]->ptr,"links") && c->argc == 2) {
+        /* CLUSTER LINKS */
+        addReplyClusterLinksDescription(c);
+    } else {
+        return 0;
+    }
+
+    return 1;
+}
+
+const char **clusterCommandExtendedHelp(void) {
+    static const char *help[] = {
+        "ADDSLOTS <slot> [<slot> ...]",
+        "    Assign slots to current node.",
+        "ADDSLOTSRANGE <start slot> <end slot> [<start slot> <end slot> ...]",
+        "    Assign slots which are between <start-slot> and <end-slot> to current node.",
+        "BUMPEPOCH",
+        "    Advance the cluster config epoch.",
+        "COUNT-FAILURE-REPORTS <node-id>",
+        "    Return number of failure reports for <node-id>.",
+        "DELSLOTS <slot> [<slot> ...]",
+        "    Delete slots information from current node.",
+        "DELSLOTSRANGE <start slot> <end slot> [<start slot> <end slot> ...]",
+        "    Delete slots information which are between <start-slot> and <end-slot> from current node.",
+        "FAILOVER [FORCE|TAKEOVER]",
+        "    Promote current replica node to being a master.",
+        "FORGET <node-id>",
+        "    Remove a node from the cluster.",
+        "FLUSHSLOTS",
+        "    Delete current node own slots information.",
+        "MEET <ip> <port> [<bus-port>]",
+        "    Connect nodes into a working cluster.",
+        "REPLICATE <node-id>",
+        "    Configure current node as replica to <node-id>.",
+        "RESET [HARD|SOFT]",
+        "    Reset current node (default: soft).",
+        "SET-CONFIG-EPOCH <epoch>",
+        "    Set config epoch of current node.",
+        "SETSLOT <slot> (IMPORTING <node-id>|MIGRATING <node-id>|STABLE|NODE <node-id>)",
+        "    Set slot state.",
+        "SAVECONFIG",
+        "    Force saving cluster configuration on disk.",
+        "LINKS",
+        "    Return information about all network links between this node and its peers.",
+        "    Output format is an array where each array element is a map containing attributes of a link",
+        NULL
+    };
+
+    return help;
+}
+
+int clusterNodeNumSlaves(clusterNode *node) {
+    return node->numslaves;
+}
+
+clusterNode *clusterNodeGetSlave(clusterNode *node, int slave_idx) {
+    return node->slaves[slave_idx];
+}
+
+clusterNode *getMigratingSlotDest(int slot) {
+    return server.cluster->migrating_slots_to[slot];
+}
+
+clusterNode *getImportingSlotSource(int slot) {
+    return server.cluster->importing_slots_from[slot];
+}
+
+int isClusterHealthy(void) {
+    return server.cluster->state == CLUSTER_OK;
+}
+
+clusterNode *getNodeBySlot(int slot) {
+    return server.cluster->slots[slot];
+}
+
+char *clusterNodeHostname(clusterNode *node) {
+    return node->hostname;
+}
+
+long long clusterNodeReplOffset(clusterNode *node) {
+    return node->repl_offset;
+}
+
+const char *clusterNodePreferredEndpoint(clusterNode *n) {
+    char *hostname = clusterNodeHostname(n);
+    switch (server.cluster_preferred_endpoint_type) {
+        case CLUSTER_ENDPOINT_TYPE_IP:
+            return clusterNodeIp(n);
+        case CLUSTER_ENDPOINT_TYPE_HOSTNAME:
+            return (hostname != NULL && hostname[0] != '\0') ? hostname : "?";
+        case CLUSTER_ENDPOINT_TYPE_UNKNOWN_ENDPOINT:
+            return "";
+    }
+    return "unknown";
+}
+
+int clusterAllowFailoverCmd(client *c) {
+    if (!server.cluster_enabled) {
+        return 1;
+    }
+    addReplyError(c,"FAILOVER not allowed in cluster mode. "
+                    "Use CLUSTER FAILOVER command instead.");
+    return 0;
+}
+
+void clusterPromoteSelfToMaster(void) {
+    replicationUnsetMaster();
+}
diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h
new file mode 100644
index 00000000000..a857184ab3e
--- /dev/null
+++ b/src/cluster_legacy.h
@@ -0,0 +1,359 @@
+#ifndef CLUSTER_LEGACY_H
+#define CLUSTER_LEGACY_H
+
+#define CLUSTER_PORT_INCR 10000 /* Cluster port = baseport + PORT_INCR */
+
+/* The following defines are amount of time, sometimes expressed as
+ * multiplicators of the node timeout value (when ending with MULT). */
+#define CLUSTER_FAIL_REPORT_VALIDITY_MULT 2 /* Fail report validity. */
+#define CLUSTER_FAIL_UNDO_TIME_MULT 2 /* Undo fail if master is back. */
+#define CLUSTER_MF_TIMEOUT 5000 /* Milliseconds to do a manual failover. */
+#define CLUSTER_MF_PAUSE_MULT 2 /* Master pause manual failover mult. */
+#define CLUSTER_SLAVE_MIGRATION_DELAY 5000 /* Delay for slave migration. */
+
+/* Reasons why a slave is not able to failover. */
+#define CLUSTER_CANT_FAILOVER_NONE 0
+#define CLUSTER_CANT_FAILOVER_DATA_AGE 1
+#define CLUSTER_CANT_FAILOVER_WAITING_DELAY 2
+#define CLUSTER_CANT_FAILOVER_EXPIRED 3
+#define CLUSTER_CANT_FAILOVER_WAITING_VOTES 4
+#define CLUSTER_CANT_FAILOVER_RELOG_PERIOD (10) /* seconds. */
+
+/* clusterState todo_before_sleep flags. */
+#define CLUSTER_TODO_HANDLE_FAILOVER (1<<0)
+#define CLUSTER_TODO_UPDATE_STATE (1<<1)
+#define CLUSTER_TODO_SAVE_CONFIG (1<<2)
+#define CLUSTER_TODO_FSYNC_CONFIG (1<<3)
+#define CLUSTER_TODO_HANDLE_MANUALFAILOVER (1<<4)
+
+/* clusterLink encapsulates everything needed to talk with a remote node. */
+typedef struct clusterLink {
+    mstime_t ctime;             /* Link creation time */
+    connection *conn;           /* Connection to remote node */
+    list *send_msg_queue;        /* List of messages to be sent */
+    size_t head_msg_send_offset; /* Number of bytes already sent of message at head of queue */
+    unsigned long long send_msg_queue_mem; /* Memory in bytes used by message queue */
+    char *rcvbuf;               /* Packet reception buffer */
+    size_t rcvbuf_len;          /* Used size of rcvbuf */
+    size_t rcvbuf_alloc;        /* Allocated size of rcvbuf */
+    clusterNode *node;          /* Node related to this link. Initialized to NULL when unknown */
+    int inbound;                /* 1 if this link is an inbound link accepted from the related node */
+} clusterLink;
+
+/* Cluster node flags and macros. */
+#define CLUSTER_NODE_MASTER 1     /* The node is a master */
+#define CLUSTER_NODE_SLAVE 2      /* The node is a slave */
+#define CLUSTER_NODE_PFAIL 4      /* Failure? Need acknowledge */
+#define CLUSTER_NODE_FAIL 8       /* The node is believed to be malfunctioning */
+#define CLUSTER_NODE_MYSELF 16    /* This node is myself */
+#define CLUSTER_NODE_HANDSHAKE 32 /* We have still to exchange the first ping */
+#define CLUSTER_NODE_NOADDR   64  /* We don't know the address of this node */
+#define CLUSTER_NODE_MEET 128     /* Send a MEET message to this node */
+#define CLUSTER_NODE_MIGRATE_TO 256 /* Master eligible for replica migration. */
+#define CLUSTER_NODE_NOFAILOVER 512 /* Slave will not try to failover. */
+#define CLUSTER_NODE_NULL_NAME "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
+
+#define nodeIsSlave(n) ((n)->flags & CLUSTER_NODE_SLAVE)
+#define nodeInHandshake(n) ((n)->flags & CLUSTER_NODE_HANDSHAKE)
+#define nodeHasAddr(n) (!((n)->flags & CLUSTER_NODE_NOADDR))
+#define nodeTimedOut(n) ((n)->flags & CLUSTER_NODE_PFAIL)
+#define nodeFailed(n) ((n)->flags & CLUSTER_NODE_FAIL)
+#define nodeCantFailover(n) ((n)->flags & CLUSTER_NODE_NOFAILOVER)
+
+/* This structure represent elements of node->fail_reports. */
+typedef struct clusterNodeFailReport {
+    clusterNode *node;         /* Node reporting the failure condition. */
+    mstime_t time;             /* Time of the last report from this node. */
+} clusterNodeFailReport;
+
+/* Redis cluster messages header */
+
+/* Message types.
+ *
+ * Note that the PING, PONG and MEET messages are actually the same exact
+ * kind of packet. PONG is the reply to ping, in the exact format as a PING,
+ * while MEET is a special PING that forces the receiver to add the sender
+ * as a node (if it is not already in the list). */
+#define CLUSTERMSG_TYPE_PING 0          /* Ping */
+#define CLUSTERMSG_TYPE_PONG 1          /* Pong (reply to Ping) */
+#define CLUSTERMSG_TYPE_MEET 2          /* Meet "let's join" message */
+#define CLUSTERMSG_TYPE_FAIL 3          /* Mark node xxx as failing */
+#define CLUSTERMSG_TYPE_PUBLISH 4       /* Pub/Sub Publish propagation */
+#define CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST 5 /* May I failover? */
+#define CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK 6     /* Yes, you have my vote */
+#define CLUSTERMSG_TYPE_UPDATE 7        /* Another node slots configuration */
+#define CLUSTERMSG_TYPE_MFSTART 8       /* Pause clients for manual failover */
+#define CLUSTERMSG_TYPE_MODULE 9        /* Module cluster API message. */
+#define CLUSTERMSG_TYPE_PUBLISHSHARD 10 /* Pub/Sub Publish shard propagation */
+#define CLUSTERMSG_TYPE_COUNT 11        /* Total number of message types. */
+
+/* Initially we don't know our "name", but we'll find it once we connect
+ * to the first node, using the getsockname() function. Then we'll use this
+ * address for all the next messages. */
+typedef struct {
+    char nodename[CLUSTER_NAMELEN];
+    uint32_t ping_sent;
+    uint32_t pong_received;
+    char ip[NET_IP_STR_LEN];  /* IP address last time it was seen */
+    uint16_t port;              /* primary port last time it was seen */
+    uint16_t cport;             /* cluster port last time it was seen */
+    uint16_t flags;             /* node->flags copy */
+    uint16_t pport;             /* secondary port last time it was seen */
+    uint16_t notused1;
+} clusterMsgDataGossip;
+
+typedef struct {
+    char nodename[CLUSTER_NAMELEN];
+} clusterMsgDataFail;
+
+typedef struct {
+    uint32_t channel_len;
+    uint32_t message_len;
+    unsigned char bulk_data[8]; /* 8 bytes just as placeholder. */
+} clusterMsgDataPublish;
+
+typedef struct {
+    uint64_t configEpoch; /* Config epoch of the specified instance. */
+    char nodename[CLUSTER_NAMELEN]; /* Name of the slots owner. */
+    unsigned char slots[CLUSTER_SLOTS/8]; /* Slots bitmap. */
+} clusterMsgDataUpdate;
+
+typedef struct {
+    uint64_t module_id;     /* ID of the sender module. */
+    uint32_t len;           /* ID of the sender module. */
+    uint8_t type;           /* Type from 0 to 255. */
+    unsigned char bulk_data[3]; /* 3 bytes just as placeholder. */
+} clusterMsgModule;
+
+/* The cluster supports optional extension messages that can be sent
+ * along with ping/pong/meet messages to give additional info in a
+ * consistent manner. */
+typedef enum {
+    CLUSTERMSG_EXT_TYPE_HOSTNAME,
+    CLUSTERMSG_EXT_TYPE_HUMAN_NODENAME,
+    CLUSTERMSG_EXT_TYPE_FORGOTTEN_NODE,
+    CLUSTERMSG_EXT_TYPE_SHARDID,
+} clusterMsgPingtypes;
+
+/* Helper function for making sure extensions are eight byte aligned. */
+#define EIGHT_BYTE_ALIGN(size) ((((size) + 7) / 8) * 8)
+
+typedef struct {
+    char hostname[1]; /* The announced hostname, ends with \0. */
+} clusterMsgPingExtHostname;
+
+typedef struct {
+    char human_nodename[1]; /* The announced nodename, ends with \0. */
+} clusterMsgPingExtHumanNodename;
+
+typedef struct {
+    char name[CLUSTER_NAMELEN]; /* Node name. */
+    uint64_t ttl; /* Remaining time to blacklist the node, in seconds. */
+} clusterMsgPingExtForgottenNode;
+
+static_assert(sizeof(clusterMsgPingExtForgottenNode) % 8 == 0, "");
+
+typedef struct {
+    char shard_id[CLUSTER_NAMELEN]; /* The shard_id, 40 bytes fixed. */
+} clusterMsgPingExtShardId;
+
+typedef struct {
+    uint32_t length; /* Total length of this extension message (including this header) */
+    uint16_t type; /* Type of this extension message (see clusterMsgPingExtTypes) */
+    uint16_t unused; /* 16 bits of padding to make this structure 8 byte aligned. */
+    union {
+        clusterMsgPingExtHostname hostname;
+        clusterMsgPingExtHumanNodename human_nodename;
+        clusterMsgPingExtForgottenNode forgotten_node;
+        clusterMsgPingExtShardId shard_id;
+    } ext[]; /* Actual extension information, formatted so that the data is 8
+              * byte aligned, regardless of its content. */
+} clusterMsgPingExt;
+
+union clusterMsgData {
+    /* PING, MEET and PONG */
+    struct {
+        /* Array of N clusterMsgDataGossip structures */
+        clusterMsgDataGossip gossip[1];
+        /* Extension data that can optionally be sent for ping/meet/pong
+         * messages. We can't explicitly define them here though, since
+         * the gossip array isn't the real length of the gossip data. */
+    } ping;
+
+    /* FAIL */
+    struct {
+        clusterMsgDataFail about;
+    } fail;
+
+    /* PUBLISH */
+    struct {
+        clusterMsgDataPublish msg;
+    } publish;
+
+    /* UPDATE */
+    struct {
+        clusterMsgDataUpdate nodecfg;
+    } update;
+
+    /* MODULE */
+    struct {
+        clusterMsgModule msg;
+    } module;
+};
+
+#define CLUSTER_PROTO_VER 1 /* Cluster bus protocol version. */
+
+typedef struct {
+    char sig[4];        /* Signature "RCmb" (Redis Cluster message bus). */
+    uint32_t totlen;    /* Total length of this message */
+    uint16_t ver;       /* Protocol version, currently set to 1. */
+    uint16_t port;      /* Primary port number (TCP or TLS). */
+    uint16_t type;      /* Message type */
+    uint16_t count;     /* Only used for some kind of messages. */
+    uint64_t currentEpoch;  /* The epoch accordingly to the sending node. */
+    uint64_t configEpoch;   /* The config epoch if it's a master, or the last
+                               epoch advertised by its master if it is a
+                               slave. */
+    uint64_t offset;    /* Master replication offset if node is a master or
+                           processed replication offset if node is a slave. */
+    char sender[CLUSTER_NAMELEN]; /* Name of the sender node */
+    unsigned char myslots[CLUSTER_SLOTS/8];
+    char slaveof[CLUSTER_NAMELEN];
+    char myip[NET_IP_STR_LEN];    /* Sender IP, if not all zeroed. */
+    uint16_t extensions; /* Number of extensions sent along with this packet. */
+    char notused1[30];   /* 30 bytes reserved for future usage. */
+    uint16_t pport;      /* Secondary port number: if primary port is TCP port, this is
+                            TLS port, and if primary port is TLS port, this is TCP port.*/
+    uint16_t cport;      /* Sender TCP cluster bus port */
+    uint16_t flags;      /* Sender node flags */
+    unsigned char state; /* Cluster state from the POV of the sender */
+    unsigned char mflags[3]; /* Message flags: CLUSTERMSG_FLAG[012]_... */
+    union clusterMsgData data;
+} clusterMsg;
+
+/* clusterMsg defines the gossip wire protocol exchanged among Redis cluster
+ * members, which can be running different versions of redis-server bits,
+ * especially during cluster rolling upgrades.
+ *
+ * Therefore, fields in this struct should remain at the same offset from
+ * release to release. The static asserts below ensures that incompatible
+ * changes in clusterMsg be caught at compile time.
+ */
+
+static_assert(offsetof(clusterMsg, sig) == 0, "unexpected field offset");
+static_assert(offsetof(clusterMsg, totlen) == 4, "unexpected field offset");
+static_assert(offsetof(clusterMsg, ver) == 8, "unexpected field offset");
+static_assert(offsetof(clusterMsg, port) == 10, "unexpected field offset");
+static_assert(offsetof(clusterMsg, type) == 12, "unexpected field offset");
+static_assert(offsetof(clusterMsg, count) == 14, "unexpected field offset");
+static_assert(offsetof(clusterMsg, currentEpoch) == 16, "unexpected field offset");
+static_assert(offsetof(clusterMsg, configEpoch) == 24, "unexpected field offset");
+static_assert(offsetof(clusterMsg, offset) == 32, "unexpected field offset");
+static_assert(offsetof(clusterMsg, sender) == 40, "unexpected field offset");
+static_assert(offsetof(clusterMsg, myslots) == 80, "unexpected field offset");
+static_assert(offsetof(clusterMsg, slaveof) == 2128, "unexpected field offset");
+static_assert(offsetof(clusterMsg, myip) == 2168, "unexpected field offset");
+static_assert(offsetof(clusterMsg, extensions) == 2214, "unexpected field offset");
+static_assert(offsetof(clusterMsg, notused1) == 2216, "unexpected field offset");
+static_assert(offsetof(clusterMsg, pport) == 2246, "unexpected field offset");
+static_assert(offsetof(clusterMsg, cport) == 2248, "unexpected field offset");
+static_assert(offsetof(clusterMsg, flags) == 2250, "unexpected field offset");
+static_assert(offsetof(clusterMsg, state) == 2252, "unexpected field offset");
+static_assert(offsetof(clusterMsg, mflags) == 2253, "unexpected field offset");
+static_assert(offsetof(clusterMsg, data) == 2256, "unexpected field offset");
+
+#define CLUSTERMSG_MIN_LEN (sizeof(clusterMsg)-sizeof(union clusterMsgData))
+
+/* Message flags better specify the packet content or are used to
+ * provide some information about the node state. */
+#define CLUSTERMSG_FLAG0_PAUSED (1<<0) /* Master paused for manual failover. */
+#define CLUSTERMSG_FLAG0_FORCEACK (1<<1) /* Give ACK to AUTH_REQUEST even if
+                                            master is up. */
+#define CLUSTERMSG_FLAG0_EXT_DATA (1<<2) /* Message contains extension data */
+
+struct _clusterNode {
+    mstime_t ctime; /* Node object creation time. */
+    char name[CLUSTER_NAMELEN]; /* Node name, hex string, sha1-size */
+    char shard_id[CLUSTER_NAMELEN]; /* shard id, hex string, sha1-size */
+    int flags;      /* CLUSTER_NODE_... */
+    uint64_t configEpoch; /* Last configEpoch observed for this node */
+    unsigned char slots[CLUSTER_SLOTS/8]; /* slots handled by this node */
+    uint16_t *slot_info_pairs; /* Slots info represented as (start/end) pair (consecutive index). */
+    int slot_info_pairs_count; /* Used number of slots in slot_info_pairs */
+    int numslots;   /* Number of slots handled by this node */
+    int numslaves;  /* Number of slave nodes, if this is a master */
+    clusterNode **slaves; /* pointers to slave nodes */
+    clusterNode *slaveof; /* pointer to the master node. Note that it
+                             may be NULL even if the node is a slave
+                             if we don't have the master node in our
+                             tables. */
+    unsigned long long last_in_ping_gossip; /* The number of the last carried in the ping gossip section */
+    mstime_t ping_sent;      /* Unix time we sent latest ping */
+    mstime_t pong_received;  /* Unix time we received the pong */
+    mstime_t data_received;  /* Unix time we received any data */
+    mstime_t fail_time;      /* Unix time when FAIL flag was set */
+    mstime_t voted_time;     /* Last time we voted for a slave of this master */
+    mstime_t repl_offset_time;  /* Unix time we received offset for this node */
+    mstime_t orphaned_time;     /* Starting time of orphaned master condition */
+    long long repl_offset;      /* Last known repl offset for this node. */
+    char ip[NET_IP_STR_LEN];    /* Latest known IP address of this node */
+    sds hostname;               /* The known hostname for this node */
+    sds human_nodename;         /* The known human readable nodename for this node */
+    int tcp_port;               /* Latest known clients TCP port. */
+    int tls_port;               /* Latest known clients TLS port */
+    int cport;                  /* Latest known cluster port of this node. */
+    clusterLink *link;          /* TCP/IP link established toward this node */
+    clusterLink *inbound_link;  /* TCP/IP link accepted from this node */
+    list *fail_reports;         /* List of nodes signaling this as failing */
+};
+
+struct clusterState {
+    clusterNode *myself;  /* This node */
+    uint64_t currentEpoch;
+    int state;            /* CLUSTER_OK, CLUSTER_FAIL, ... */
+    int size;             /* Num of master nodes with at least one slot */
+    dict *nodes;          /* Hash table of name -> clusterNode structures */
+    dict *shards;         /* Hash table of shard_id -> list (of nodes) structures */
+    dict *nodes_black_list; /* Nodes we don't re-add for a few seconds. */
+    clusterNode *migrating_slots_to[CLUSTER_SLOTS];
+    clusterNode *importing_slots_from[CLUSTER_SLOTS];
+    clusterNode *slots[CLUSTER_SLOTS];
+    /* The following fields are used to take the slave state on elections. */
+    mstime_t failover_auth_time; /* Time of previous or next election. */
+    int failover_auth_count;    /* Number of votes received so far. */
+    int failover_auth_sent;     /* True if we already asked for votes. */
+    int failover_auth_rank;     /* This slave rank for current auth request. */
+    uint64_t failover_auth_epoch; /* Epoch of the current election. */
+    int cant_failover_reason;   /* Why a slave is currently not able to
+                                   failover. See the CANT_FAILOVER_* macros. */
+    /* Manual failover state in common. */
+    mstime_t mf_end;            /* Manual failover time limit (ms unixtime).
+                                   It is zero if there is no MF in progress. */
+    /* Manual failover state of master. */
+    clusterNode *mf_slave;      /* Slave performing the manual failover. */
+    /* Manual failover state of slave. */
+    long long mf_master_offset; /* Master offset the slave needs to start MF
+                                   or -1 if still not received. */
+    int mf_can_start;           /* If non-zero signal that the manual failover
+                                   can start requesting masters vote. */
+    /* The following fields are used by masters to take state on elections. */
+    uint64_t lastVoteEpoch;     /* Epoch of the last vote granted. */
+    int todo_before_sleep; /* Things to do in clusterBeforeSleep(). */
+    /* Stats */
+    /* Messages received and sent by type. */
+    long long stats_bus_messages_sent[CLUSTERMSG_TYPE_COUNT];
+    long long stats_bus_messages_received[CLUSTERMSG_TYPE_COUNT];
+    long long stats_pfail_nodes;    /* Number of nodes in PFAIL status,
+                                       excluding nodes without address. */
+    unsigned long long stat_cluster_links_buffer_limit_exceeded;  /* Total number of cluster links freed due to exceeding buffer limit */
+
+    /* Bit map for slots that are no longer claimed by the owner in cluster PING
+     * messages. During slot migration, the owner will stop claiming the slot after
+     * the ownership transfer. Set the bit corresponding to the slot when a node
+     * stops claiming the slot. This prevents spreading incorrect information (that
+     * source still owns the slot) using UPDATE messages. */
+    unsigned char owner_not_claiming_slot[CLUSTER_SLOTS / 8];
+};
+
+
+#endif //CLUSTER_LEGACY_H
diff --git a/src/commands.def b/src/commands.def
index 7e575648c5e..ff8b81d4188 100644
--- a/src/commands.def
+++ b/src/commands.def
@@ -964,14 +964,14 @@ struct COMMAND_STRUCT CLUSTER_Subcommands[] = {
 {MAKE_CMD("myid","Returns the ID of a node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_MYID_History,0,CLUSTER_MYID_Tips,0,clusterCommand,2,CMD_STALE,0,CLUSTER_MYID_Keyspecs,0,NULL,0)},
 {MAKE_CMD("myshardid","Returns the shard ID of a node.","O(1)","7.2.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_MYSHARDID_History,0,CLUSTER_MYSHARDID_Tips,1,clusterCommand,2,CMD_STALE,0,CLUSTER_MYSHARDID_Keyspecs,0,NULL,0)},
 {MAKE_CMD("nodes","Returns the cluster configuration for a node.","O(N) where N is the total number of Cluster nodes","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_NODES_History,0,CLUSTER_NODES_Tips,1,clusterCommand,2,CMD_STALE,0,CLUSTER_NODES_Keyspecs,0,NULL,0)},
-{MAKE_CMD("replicas","Lists the replica nodes of a master node.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_REPLICAS_History,0,CLUSTER_REPLICAS_Tips,1,clusterCommand,3,CMD_ADMIN|CMD_STALE,0,CLUSTER_REPLICAS_Keyspecs,0,NULL,1),.args=CLUSTER_REPLICAS_Args},
+{MAKE_CMD("replicas","Lists the replica nodes of a master node.","O(N) where N is the number of replicas.","5.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_REPLICAS_History,0,CLUSTER_REPLICAS_Tips,1,clusterCommand,3,CMD_ADMIN|CMD_STALE,0,CLUSTER_REPLICAS_Keyspecs,0,NULL,1),.args=CLUSTER_REPLICAS_Args},
 {MAKE_CMD("replicate","Configure a node as replica of a master node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_REPLICATE_History,0,CLUSTER_REPLICATE_Tips,0,clusterCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_REPLICATE_Keyspecs,0,NULL,1),.args=CLUSTER_REPLICATE_Args},
 {MAKE_CMD("reset","Resets a node.","O(N) where N is the number of known nodes. The command may execute a FLUSHALL as a side effect.","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_RESET_History,0,CLUSTER_RESET_Tips,0,clusterCommand,-2,CMD_ADMIN|CMD_STALE|CMD_NOSCRIPT,0,CLUSTER_RESET_Keyspecs,0,NULL,1),.args=CLUSTER_RESET_Args},
 {MAKE_CMD("saveconfig","Forces a node to save the cluster configuration to disk.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SAVECONFIG_History,0,CLUSTER_SAVECONFIG_Tips,0,clusterCommand,2,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_SAVECONFIG_Keyspecs,0,NULL,0)},
 {MAKE_CMD("set-config-epoch","Sets the configuration epoch for a new node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SET_CONFIG_EPOCH_History,0,CLUSTER_SET_CONFIG_EPOCH_Tips,0,clusterCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_SET_CONFIG_EPOCH_Keyspecs,0,NULL,1),.args=CLUSTER_SET_CONFIG_EPOCH_Args},
 {MAKE_CMD("setslot","Binds a hash slot to a node.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SETSLOT_History,0,CLUSTER_SETSLOT_Tips,0,clusterCommand,-4,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_STALE,0,CLUSTER_SETSLOT_Keyspecs,0,NULL,2),.args=CLUSTER_SETSLOT_Args},
 {MAKE_CMD("shards","Returns the mapping of cluster slots to shards.","O(N) where N is the total number of cluster nodes","7.0.0",CMD_DOC_NONE,NULL,NULL,"cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SHARDS_History,0,CLUSTER_SHARDS_Tips,1,clusterCommand,2,CMD_LOADING|CMD_STALE,0,CLUSTER_SHARDS_Keyspecs,0,NULL,0)},
-{MAKE_CMD("slaves","Lists the replica nodes of a master node.","O(1)","3.0.0",CMD_DOC_DEPRECATED,"`CLUSTER REPLICAS`","5.0.0","cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SLAVES_History,0,CLUSTER_SLAVES_Tips,1,clusterCommand,3,CMD_ADMIN|CMD_STALE,0,CLUSTER_SLAVES_Keyspecs,0,NULL,1),.args=CLUSTER_SLAVES_Args},
+{MAKE_CMD("slaves","Lists the replica nodes of a master node.","O(N) where N is the number of replicas.","3.0.0",CMD_DOC_DEPRECATED,"`CLUSTER REPLICAS`","5.0.0","cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SLAVES_History,0,CLUSTER_SLAVES_Tips,1,clusterCommand,3,CMD_ADMIN|CMD_STALE,0,CLUSTER_SLAVES_Keyspecs,0,NULL,1),.args=CLUSTER_SLAVES_Args},
 {MAKE_CMD("slots","Returns the mapping of cluster slots to nodes.","O(N) where N is the total number of Cluster nodes","3.0.0",CMD_DOC_DEPRECATED,"`CLUSTER SHARDS`","7.0.0","cluster",COMMAND_GROUP_CLUSTER,CLUSTER_SLOTS_History,2,CLUSTER_SLOTS_Tips,1,clusterCommand,2,CMD_LOADING|CMD_STALE,0,CLUSTER_SLOTS_Keyspecs,0,NULL,0)},
 {0}
 };
@@ -1177,6 +1177,7 @@ commandHistory CLIENT_KILL_History[] = {
 {"3.2.0","Added `master` type in for `TYPE` option."},
 {"5.0.0","Replaced `slave` `TYPE` with `replica`. `slave` still supported for backward compatibility."},
 {"6.2.0","`LADDR` option."},
+{"7.4.0","`MAXAGE` option."},
 };
 #endif
 
@@ -1213,12 +1214,13 @@ struct COMMAND_ARG CLIENT_KILL_filter_new_format_Subargs[] = {
 {MAKE_ARG("addr",ARG_TYPE_STRING,-1,"ADDR",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL),.display_text="ip:port"},
 {MAKE_ARG("laddr",ARG_TYPE_STRING,-1,"LADDR",NULL,"6.2.0",CMD_ARG_OPTIONAL,0,NULL),.display_text="ip:port"},
 {MAKE_ARG("skipme",ARG_TYPE_ONEOF,-1,"SKIPME",NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=CLIENT_KILL_filter_new_format_skipme_Subargs},
+{MAKE_ARG("maxage",ARG_TYPE_INTEGER,-1,"MAXAGE",NULL,"7.4.0",CMD_ARG_OPTIONAL,0,NULL)},
 };
 
 /* CLIENT KILL filter argument table */
 struct COMMAND_ARG CLIENT_KILL_filter_Subargs[] = {
 {MAKE_ARG("old-format",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,"2.8.12"),.display_text="ip:port"},
-{MAKE_ARG("new-format",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,6,NULL),.subargs=CLIENT_KILL_filter_new_format_Subargs},
+{MAKE_ARG("new-format",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,7,NULL),.subargs=CLIENT_KILL_filter_new_format_Subargs},
 };
 
 /* CLIENT KILL argument table */
@@ -1391,7 +1393,10 @@ struct COMMAND_ARG CLIENT_REPLY_Args[] = {
 
 #ifndef SKIP_CMD_TIPS_TABLE
 /* CLIENT SETINFO tips */
-#define CLIENT_SETINFO_Tips NULL
+const char *CLIENT_SETINFO_Tips[] = {
+"request_policy:all_nodes",
+"response_policy:all_succeeded",
+};
 #endif
 
 #ifndef SKIP_CMD_KEY_SPECS_TABLE
@@ -1419,7 +1424,10 @@ struct COMMAND_ARG CLIENT_SETINFO_Args[] = {
 
 #ifndef SKIP_CMD_TIPS_TABLE
 /* CLIENT SETNAME tips */
-#define CLIENT_SETNAME_Tips NULL
+const char *CLIENT_SETNAME_Tips[] = {
+"request_policy:all_nodes",
+"response_policy:all_succeeded",
+};
 #endif
 
 #ifndef SKIP_CMD_KEY_SPECS_TABLE
@@ -1537,14 +1545,14 @@ struct COMMAND_STRUCT CLIENT_Subcommands[] = {
 {MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_HELP_History,0,CLIENT_HELP_Tips,0,clientCommand,2,CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_HELP_Keyspecs,0,NULL,0)},
 {MAKE_CMD("id","Returns the unique client ID of the connection.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_ID_History,0,CLIENT_ID_Tips,0,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_ID_Keyspecs,0,NULL,0)},
 {MAKE_CMD("info","Returns information about the connection.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_INFO_History,0,CLIENT_INFO_Tips,1,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_INFO_Keyspecs,0,NULL,0)},
-{MAKE_CMD("kill","Terminates open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_KILL_History,5,CLIENT_KILL_Tips,0,clientCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_KILL_Keyspecs,0,NULL,1),.args=CLIENT_KILL_Args},
+{MAKE_CMD("kill","Terminates open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_KILL_History,6,CLIENT_KILL_Tips,0,clientCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_KILL_Keyspecs,0,NULL,1),.args=CLIENT_KILL_Args},
 {MAKE_CMD("list","Lists open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_LIST_History,6,CLIENT_LIST_Tips,1,clientCommand,-2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_LIST_Keyspecs,0,NULL,2),.args=CLIENT_LIST_Args},
 {MAKE_CMD("no-evict","Sets the client eviction mode of the connection.","O(1)","7.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_NO_EVICT_History,0,CLIENT_NO_EVICT_Tips,0,clientCommand,3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_NO_EVICT_Keyspecs,0,NULL,1),.args=CLIENT_NO_EVICT_Args},
 {MAKE_CMD("no-touch","Controls whether commands sent by the client affect the LRU/LFU of accessed keys.","O(1)","7.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_NO_TOUCH_History,0,CLIENT_NO_TOUCH_Tips,0,clientCommand,3,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,ACL_CATEGORY_CONNECTION,CLIENT_NO_TOUCH_Keyspecs,0,NULL,1),.args=CLIENT_NO_TOUCH_Args},
 {MAKE_CMD("pause","Suspends commands processing.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_PAUSE_History,1,CLIENT_PAUSE_Tips,0,clientCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_PAUSE_Keyspecs,0,NULL,2),.args=CLIENT_PAUSE_Args},
 {MAKE_CMD("reply","Instructs the server whether to reply to commands.","O(1)","3.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_REPLY_History,0,CLIENT_REPLY_Tips,0,clientCommand,3,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_REPLY_Keyspecs,0,NULL,1),.args=CLIENT_REPLY_Args},
-{MAKE_CMD("setinfo","Sets information specific to the client or connection.","O(1)","7.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_SETINFO_History,0,CLIENT_SETINFO_Tips,0,clientSetinfoCommand,4,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_SETINFO_Keyspecs,0,NULL,1),.args=CLIENT_SETINFO_Args},
-{MAKE_CMD("setname","Sets the connection name.","O(1)","2.6.9",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_SETNAME_History,0,CLIENT_SETNAME_Tips,0,clientCommand,3,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_SETNAME_Keyspecs,0,NULL,1),.args=CLIENT_SETNAME_Args},
+{MAKE_CMD("setinfo","Sets information specific to the client or connection.","O(1)","7.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_SETINFO_History,0,CLIENT_SETINFO_Tips,2,clientSetinfoCommand,4,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_SETINFO_Keyspecs,0,NULL,1),.args=CLIENT_SETINFO_Args},
+{MAKE_CMD("setname","Sets the connection name.","O(1)","2.6.9",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_SETNAME_History,0,CLIENT_SETNAME_Tips,2,clientCommand,3,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_SETNAME_Keyspecs,0,NULL,1),.args=CLIENT_SETNAME_Args},
 {MAKE_CMD("tracking","Controls server-assisted client-side caching for the connection.","O(1). Some options may introduce additional complexity.","6.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_TRACKING_History,0,CLIENT_TRACKING_Tips,0,clientCommand,-3,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_TRACKING_Keyspecs,0,NULL,7),.args=CLIENT_TRACKING_Args},
 {MAKE_CMD("trackinginfo","Returns information about server-assisted client-side caching for the connection.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_TRACKINGINFO_History,0,CLIENT_TRACKINGINFO_Tips,0,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_TRACKINGINFO_Keyspecs,0,NULL,0)},
 {MAKE_CMD("unblock","Unblocks a client blocked by a blocking command from a different connection.","O(log N) where N is the number of client connections","5.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_UNBLOCK_History,0,CLIENT_UNBLOCK_Tips,0,clientCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_UNBLOCK_Keyspecs,0,NULL,2),.args=CLIENT_UNBLOCK_Args},
@@ -2328,6 +2336,7 @@ struct COMMAND_ARG PTTL_Args[] = {
 /* RANDOMKEY tips */
 const char *RANDOMKEY_Tips[] = {
 "request_policy:all_shards",
+"response_policy:special",
 "nondeterministic_output",
 };
 #endif
@@ -2437,6 +2446,7 @@ commandHistory SCAN_History[] = {
 const char *SCAN_Tips[] = {
 "nondeterministic_output",
 "request_policy:special",
+"response_policy:special",
 };
 #endif
 
@@ -2890,6 +2900,7 @@ struct COMMAND_ARG GEORADIUS_Args[] = {
 #ifndef SKIP_CMD_HISTORY_TABLE
 /* GEORADIUSBYMEMBER history */
 commandHistory GEORADIUSBYMEMBER_History[] = {
+{"6.2.0","Added the `ANY` option for `COUNT`."},
 {"7.0.0","Added support for uppercase unit names."},
 };
 #endif
@@ -2950,7 +2961,10 @@ struct COMMAND_ARG GEORADIUSBYMEMBER_Args[] = {
 
 #ifndef SKIP_CMD_HISTORY_TABLE
 /* GEORADIUSBYMEMBER_RO history */
-#define GEORADIUSBYMEMBER_RO_History NULL
+commandHistory GEORADIUSBYMEMBER_RO_History[] = {
+{"6.2.0","Added the `ANY` option for `COUNT`."},
+{"7.0.0","Added support for uppercase unit names."},
+};
 #endif
 
 #ifndef SKIP_CMD_TIPS_TABLE
@@ -3004,6 +3018,7 @@ struct COMMAND_ARG GEORADIUSBYMEMBER_RO_Args[] = {
 /* GEORADIUS_RO history */
 commandHistory GEORADIUS_RO_History[] = {
 {"6.2.0","Added the `ANY` option for `COUNT`."},
+{"7.0.0","Added support for uppercase unit names."},
 };
 #endif
 
@@ -3288,6 +3303,119 @@ struct COMMAND_ARG HEXISTS_Args[] = {
 {MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
 };
 
+/********** HEXPIRE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HEXPIRE history */
+#define HEXPIRE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HEXPIRE tips */
+#define HEXPIRE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HEXPIRE key specs */
+keySpec HEXPIRE_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HEXPIRE condition argument table */
+struct COMMAND_ARG HEXPIRE_condition_Subargs[] = {
+{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("gt",ARG_TYPE_PURE_TOKEN,-1,"GT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("lt",ARG_TYPE_PURE_TOKEN,-1,"LT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* HEXPIRE fields argument table */
+struct COMMAND_ARG HEXPIRE_fields_Subargs[] = {
+{MAKE_ARG("numfields",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/* HEXPIRE argument table */
+struct COMMAND_ARG HEXPIRE_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("seconds",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,4,NULL),.subargs=HEXPIRE_condition_Subargs},
+{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HEXPIRE_fields_Subargs},
+};
+
+/********** HEXPIREAT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HEXPIREAT history */
+#define HEXPIREAT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HEXPIREAT tips */
+#define HEXPIREAT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HEXPIREAT key specs */
+keySpec HEXPIREAT_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HEXPIREAT condition argument table */
+struct COMMAND_ARG HEXPIREAT_condition_Subargs[] = {
+{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("gt",ARG_TYPE_PURE_TOKEN,-1,"GT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("lt",ARG_TYPE_PURE_TOKEN,-1,"LT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* HEXPIREAT fields argument table */
+struct COMMAND_ARG HEXPIREAT_fields_Subargs[] = {
+{MAKE_ARG("numfields",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/* HEXPIREAT argument table */
+struct COMMAND_ARG HEXPIREAT_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("unix-time-seconds",ARG_TYPE_UNIX_TIME,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,4,NULL),.subargs=HEXPIREAT_condition_Subargs},
+{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HEXPIREAT_fields_Subargs},
+};
+
+/********** HEXPIRETIME ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HEXPIRETIME history */
+#define HEXPIRETIME_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HEXPIRETIME tips */
+#define HEXPIRETIME_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HEXPIRETIME key specs */
+keySpec HEXPIRETIME_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HEXPIRETIME fields argument table */
+struct COMMAND_ARG HEXPIRETIME_fields_Subargs[] = {
+{MAKE_ARG("numfields",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/* HEXPIRETIME argument table */
+struct COMMAND_ARG HEXPIRETIME_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HEXPIRETIME_fields_Subargs},
+};
+
 /********** HGET ********************/
 
 #ifndef SKIP_CMD_HISTORY_TABLE
@@ -3497,6 +3625,181 @@ struct COMMAND_ARG HMSET_Args[] = {
 {MAKE_ARG("data",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=HMSET_data_Subargs},
 };
 
+/********** HPERSIST ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HPERSIST history */
+#define HPERSIST_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HPERSIST tips */
+#define HPERSIST_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HPERSIST key specs */
+keySpec HPERSIST_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HPERSIST fields argument table */
+struct COMMAND_ARG HPERSIST_fields_Subargs[] = {
+{MAKE_ARG("numfields",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/* HPERSIST argument table */
+struct COMMAND_ARG HPERSIST_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HPERSIST_fields_Subargs},
+};
+
+/********** HPEXPIRE ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HPEXPIRE history */
+#define HPEXPIRE_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HPEXPIRE tips */
+#define HPEXPIRE_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HPEXPIRE key specs */
+keySpec HPEXPIRE_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HPEXPIRE condition argument table */
+struct COMMAND_ARG HPEXPIRE_condition_Subargs[] = {
+{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("gt",ARG_TYPE_PURE_TOKEN,-1,"GT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("lt",ARG_TYPE_PURE_TOKEN,-1,"LT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* HPEXPIRE fields argument table */
+struct COMMAND_ARG HPEXPIRE_fields_Subargs[] = {
+{MAKE_ARG("numfields",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/* HPEXPIRE argument table */
+struct COMMAND_ARG HPEXPIRE_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("milliseconds",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,4,NULL),.subargs=HPEXPIRE_condition_Subargs},
+{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HPEXPIRE_fields_Subargs},
+};
+
+/********** HPEXPIREAT ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HPEXPIREAT history */
+#define HPEXPIREAT_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HPEXPIREAT tips */
+#define HPEXPIREAT_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HPEXPIREAT key specs */
+keySpec HPEXPIREAT_Keyspecs[1] = {
+{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HPEXPIREAT condition argument table */
+struct COMMAND_ARG HPEXPIREAT_condition_Subargs[] = {
+{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("gt",ARG_TYPE_PURE_TOKEN,-1,"GT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("lt",ARG_TYPE_PURE_TOKEN,-1,"LT",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* HPEXPIREAT fields argument table */
+struct COMMAND_ARG HPEXPIREAT_fields_Subargs[] = {
+{MAKE_ARG("numfields",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/* HPEXPIREAT argument table */
+struct COMMAND_ARG HPEXPIREAT_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("unix-time-milliseconds",ARG_TYPE_UNIX_TIME,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,4,NULL),.subargs=HPEXPIREAT_condition_Subargs},
+{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HPEXPIREAT_fields_Subargs},
+};
+
+/********** HPEXPIRETIME ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HPEXPIRETIME history */
+#define HPEXPIRETIME_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HPEXPIRETIME tips */
+#define HPEXPIRETIME_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HPEXPIRETIME key specs */
+keySpec HPEXPIRETIME_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HPEXPIRETIME fields argument table */
+struct COMMAND_ARG HPEXPIRETIME_fields_Subargs[] = {
+{MAKE_ARG("numfields",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/* HPEXPIRETIME argument table */
+struct COMMAND_ARG HPEXPIRETIME_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HPEXPIRETIME_fields_Subargs},
+};
+
+/********** HPTTL ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HPTTL history */
+#define HPTTL_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HPTTL tips */
+#define HPTTL_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HPTTL key specs */
+keySpec HPTTL_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HPTTL fields argument table */
+struct COMMAND_ARG HPTTL_fields_Subargs[] = {
+{MAKE_ARG("numfields",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/* HPTTL argument table */
+struct COMMAND_ARG HPTTL_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HPTTL_fields_Subargs},
+};
+
 /********** HRANDFIELD ********************/
 
 #ifndef SKIP_CMD_HISTORY_TABLE
@@ -3557,6 +3860,7 @@ struct COMMAND_ARG HSCAN_Args[] = {
 {MAKE_ARG("cursor",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
 {MAKE_ARG("pattern",ARG_TYPE_PATTERN,-1,"MATCH",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
 {MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"COUNT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("novalues",ARG_TYPE_PURE_TOKEN,-1,"NOVALUES",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
 };
 
 /********** HSET ********************/
@@ -3643,6 +3947,37 @@ struct COMMAND_ARG HSTRLEN_Args[] = {
 {MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
 };
 
+/********** HTTL ********************/
+
+#ifndef SKIP_CMD_HISTORY_TABLE
+/* HTTL history */
+#define HTTL_History NULL
+#endif
+
+#ifndef SKIP_CMD_TIPS_TABLE
+/* HTTL tips */
+#define HTTL_Tips NULL
+#endif
+
+#ifndef SKIP_CMD_KEY_SPECS_TABLE
+/* HTTL key specs */
+keySpec HTTL_Keyspecs[1] = {
+{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}}
+};
+#endif
+
+/* HTTL fields argument table */
+struct COMMAND_ARG HTTL_fields_Subargs[] = {
+{MAKE_ARG("numfields",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)},
+};
+
+/* HTTL argument table */
+struct COMMAND_ARG HTTL_Args[] = {
+{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HTTL_fields_Subargs},
+};
+
 /********** HVALS ********************/
 
 #ifndef SKIP_CMD_HISTORY_TABLE
@@ -5884,7 +6219,10 @@ struct COMMAND_ARG ACL_CAT_Args[] = {
 
 #ifndef SKIP_CMD_TIPS_TABLE
 /* ACL DELUSER tips */
-#define ACL_DELUSER_Tips NULL
+const char *ACL_DELUSER_Tips[] = {
+"request_policy:all_nodes",
+"response_policy:all_succeeded",
+};
 #endif
 
 #ifndef SKIP_CMD_KEY_SPECS_TABLE
@@ -6058,7 +6396,10 @@ struct COMMAND_ARG ACL_LOG_Args[] = {
 
 #ifndef SKIP_CMD_TIPS_TABLE
 /* ACL SAVE tips */
-#define ACL_SAVE_Tips NULL
+const char *ACL_SAVE_Tips[] = {
+"request_policy:all_nodes",
+"response_policy:all_succeeded",
+};
 #endif
 
 #ifndef SKIP_CMD_KEY_SPECS_TABLE
@@ -6078,7 +6419,10 @@ commandHistory ACL_SETUSER_History[] = {
 
 #ifndef SKIP_CMD_TIPS_TABLE
 /* ACL SETUSER tips */
-#define ACL_SETUSER_Tips NULL
+const char *ACL_SETUSER_Tips[] = {
+"request_policy:all_nodes",
+"response_policy:all_succeeded",
+};
 #endif
 
 #ifndef SKIP_CMD_KEY_SPECS_TABLE
@@ -6129,7 +6473,7 @@ struct COMMAND_ARG ACL_SETUSER_Args[] = {
 /* ACL command table */
 struct COMMAND_STRUCT ACL_Subcommands[] = {
 {MAKE_CMD("cat","Lists the ACL categories, or the commands inside a category.","O(1) since the categories and commands are a fixed set.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_CAT_History,0,ACL_CAT_Tips,0,aclCommand,-2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_CAT_Keyspecs,0,NULL,1),.args=ACL_CAT_Args},
-{MAKE_CMD("deluser","Deletes ACL users, and terminates their connections.","O(1) amortized time considering the typical user.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_DELUSER_History,0,ACL_DELUSER_Tips,0,aclCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_DELUSER_Keyspecs,0,NULL,1),.args=ACL_DELUSER_Args},
+{MAKE_CMD("deluser","Deletes ACL users, and terminates their connections.","O(1) amortized time considering the typical user.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_DELUSER_History,0,ACL_DELUSER_Tips,2,aclCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_DELUSER_Keyspecs,0,NULL,1),.args=ACL_DELUSER_Args},
 {MAKE_CMD("dryrun","Simulates the execution of a command by a user, without executing the command.","O(1).","7.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_DRYRUN_History,0,ACL_DRYRUN_Tips,0,aclCommand,-4,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_DRYRUN_Keyspecs,0,NULL,3),.args=ACL_DRYRUN_Args},
 {MAKE_CMD("genpass","Generates a pseudorandom, secure password that can be used to identify ACL users.","O(1)","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_GENPASS_History,0,ACL_GENPASS_Tips,0,aclCommand,-2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_GENPASS_Keyspecs,0,NULL,1),.args=ACL_GENPASS_Args},
 {MAKE_CMD("getuser","Lists the ACL rules of a user.","O(N). Where N is the number of password, command and pattern rules that the user has.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_GETUSER_History,2,ACL_GETUSER_Tips,0,aclCommand,3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_GETUSER_Keyspecs,0,NULL,1),.args=ACL_GETUSER_Args},
@@ -6137,8 +6481,8 @@ struct COMMAND_STRUCT ACL_Subcommands[] = {
 {MAKE_CMD("list","Dumps the effective rules in ACL file format.","O(N). Where N is the number of configured users.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_LIST_History,0,ACL_LIST_Tips,0,aclCommand,2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_LIST_Keyspecs,0,NULL,0)},
 {MAKE_CMD("load","Reloads the rules from the configured ACL file.","O(N). Where N is the number of configured users.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_LOAD_History,0,ACL_LOAD_Tips,0,aclCommand,2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_LOAD_Keyspecs,0,NULL,0)},
 {MAKE_CMD("log","Lists recent security events generated due to ACL rules.","O(N) with N being the number of entries shown.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_LOG_History,1,ACL_LOG_Tips,0,aclCommand,-2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_LOG_Keyspecs,0,NULL,1),.args=ACL_LOG_Args},
-{MAKE_CMD("save","Saves the effective ACL rules in the configured ACL file.","O(N). Where N is the number of configured users.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_SAVE_History,0,ACL_SAVE_Tips,0,aclCommand,2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_SAVE_Keyspecs,0,NULL,0)},
-{MAKE_CMD("setuser","Creates and modifies an ACL user and its rules.","O(N). Where N is the number of rules provided.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_SETUSER_History,2,ACL_SETUSER_Tips,0,aclCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_SETUSER_Keyspecs,0,NULL,2),.args=ACL_SETUSER_Args},
+{MAKE_CMD("save","Saves the effective ACL rules in the configured ACL file.","O(N). Where N is the number of configured users.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_SAVE_History,0,ACL_SAVE_Tips,2,aclCommand,2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_SAVE_Keyspecs,0,NULL,0)},
+{MAKE_CMD("setuser","Creates and modifies an ACL user and its rules.","O(N). Where N is the number of rules provided.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_SETUSER_History,2,ACL_SETUSER_Tips,2,aclCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_SETUSER_Keyspecs,0,NULL,2),.args=ACL_SETUSER_Args},
 {MAKE_CMD("users","Lists all ACL users.","O(N). Where N is the number of configured users.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_USERS_History,0,ACL_USERS_Tips,0,aclCommand,2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_USERS_Keyspecs,0,NULL,0)},
 {MAKE_CMD("whoami","Returns the authenticated username of the current connection.","O(1)","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_WHOAMI_History,0,ACL_WHOAMI_Tips,0,aclCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,ACL_WHOAMI_Keyspecs,0,NULL,0)},
 {0}
@@ -6444,7 +6788,10 @@ struct COMMAND_ARG CONFIG_GET_Args[] = {
 
 #ifndef SKIP_CMD_TIPS_TABLE
 /* CONFIG RESETSTAT tips */
-#define CONFIG_RESETSTAT_Tips NULL
+const char *CONFIG_RESETSTAT_Tips[] = {
+"request_policy:all_nodes",
+"response_policy:all_succeeded",
+};
 #endif
 
 #ifndef SKIP_CMD_KEY_SPECS_TABLE
@@ -6461,7 +6808,10 @@ struct COMMAND_ARG CONFIG_GET_Args[] = {
 
 #ifndef SKIP_CMD_TIPS_TABLE
 /* CONFIG REWRITE tips */
-#define CONFIG_REWRITE_Tips NULL
+const char *CONFIG_REWRITE_Tips[] = {
+"request_policy:all_nodes",
+"response_policy:all_succeeded",
+};
 #endif
 
 #ifndef SKIP_CMD_KEY_SPECS_TABLE
@@ -6506,8 +6856,8 @@ struct COMMAND_ARG CONFIG_SET_Args[] = {
 struct COMMAND_STRUCT CONFIG_Subcommands[] = {
 {MAKE_CMD("get","Returns the effective values of configuration parameters.","O(N) when N is the number of configuration parameters provided","2.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,CONFIG_GET_History,1,CONFIG_GET_Tips,0,configGetCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,CONFIG_GET_Keyspecs,0,NULL,1),.args=CONFIG_GET_Args},
 {MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,CONFIG_HELP_History,0,CONFIG_HELP_Tips,0,configHelpCommand,2,CMD_LOADING|CMD_STALE,0,CONFIG_HELP_Keyspecs,0,NULL,0)},
-{MAKE_CMD("resetstat","Resets the server's statistics.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,CONFIG_RESETSTAT_History,0,CONFIG_RESETSTAT_Tips,0,configResetStatCommand,2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,CONFIG_RESETSTAT_Keyspecs,0,NULL,0)},
-{MAKE_CMD("rewrite","Persists the effective configuration to file.","O(1)","2.8.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,CONFIG_REWRITE_History,0,CONFIG_REWRITE_Tips,0,configRewriteCommand,2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,CONFIG_REWRITE_Keyspecs,0,NULL,0)},
+{MAKE_CMD("resetstat","Resets the server's statistics.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,CONFIG_RESETSTAT_History,0,CONFIG_RESETSTAT_Tips,2,configResetStatCommand,2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,CONFIG_RESETSTAT_Keyspecs,0,NULL,0)},
+{MAKE_CMD("rewrite","Persists the effective configuration to file.","O(1)","2.8.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,CONFIG_REWRITE_History,0,CONFIG_REWRITE_Tips,2,configRewriteCommand,2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,CONFIG_REWRITE_Keyspecs,0,NULL,0)},
 {MAKE_CMD("set","Sets configuration parameters in-flight.","O(N) when N is the number of configuration parameters provided","2.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,CONFIG_SET_History,1,CONFIG_SET_Tips,2,configSetCommand,-4,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,CONFIG_SET_Keyspecs,0,NULL,1),.args=CONFIG_SET_Args},
 {0}
 };
@@ -6860,7 +7210,7 @@ const char *LATENCY_LATEST_Tips[] = {
 /* LATENCY RESET tips */
 const char *LATENCY_RESET_Tips[] = {
 "request_policy:all_nodes",
-"response_policy:all_succeeded",
+"response_policy:agg_sum",
 };
 #endif
 
@@ -7290,12 +7640,29 @@ struct COMMAND_ARG PSYNC_Args[] = {
 #define REPLICAOF_Keyspecs NULL
 #endif
 
-/* REPLICAOF argument table */
-struct COMMAND_ARG REPLICAOF_Args[] = {
+/* REPLICAOF args host_port argument table */
+struct COMMAND_ARG REPLICAOF_args_host_port_Subargs[] = {
 {MAKE_ARG("host",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
 {MAKE_ARG("port",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
 };
 
+/* REPLICAOF args no_one argument table */
+struct COMMAND_ARG REPLICAOF_args_no_one_Subargs[] = {
+{MAKE_ARG("no",ARG_TYPE_PURE_TOKEN,-1,"NO",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("one",ARG_TYPE_PURE_TOKEN,-1,"ONE",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* REPLICAOF args argument table */
+struct COMMAND_ARG REPLICAOF_args_Subargs[] = {
+{MAKE_ARG("host-port",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=REPLICAOF_args_host_port_Subargs},
+{MAKE_ARG("no-one",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=REPLICAOF_args_no_one_Subargs},
+};
+
+/* REPLICAOF argument table */
+struct COMMAND_ARG REPLICAOF_Args[] = {
+{MAKE_ARG("args",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=REPLICAOF_args_Subargs},
+};
+
 /********** RESTORE_ASKING ********************/
 
 #ifndef SKIP_CMD_HISTORY_TABLE
@@ -7414,12 +7781,29 @@ struct COMMAND_ARG SHUTDOWN_Args[] = {
 #define SLAVEOF_Keyspecs NULL
 #endif
 
-/* SLAVEOF argument table */
-struct COMMAND_ARG SLAVEOF_Args[] = {
+/* SLAVEOF args host_port argument table */
+struct COMMAND_ARG SLAVEOF_args_host_port_Subargs[] = {
 {MAKE_ARG("host",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
 {MAKE_ARG("port",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
 };
 
+/* SLAVEOF args no_one argument table */
+struct COMMAND_ARG SLAVEOF_args_no_one_Subargs[] = {
+{MAKE_ARG("no",ARG_TYPE_PURE_TOKEN,-1,"NO",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("one",ARG_TYPE_PURE_TOKEN,-1,"ONE",NULL,NULL,CMD_ARG_NONE,0,NULL)},
+};
+
+/* SLAVEOF args argument table */
+struct COMMAND_ARG SLAVEOF_args_Subargs[] = {
+{MAKE_ARG("host-port",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=SLAVEOF_args_host_port_Subargs},
+{MAKE_ARG("no-one",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=SLAVEOF_args_no_one_Subargs},
+};
+
+/* SLAVEOF argument table */
+struct COMMAND_ARG SLAVEOF_Args[] = {
+{MAKE_ARG("args",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=SLAVEOF_args_Subargs},
+};
+
 /********** SLOWLOG GET ********************/
 
 #ifndef SKIP_CMD_HISTORY_TABLE
@@ -7759,7 +8143,7 @@ struct COMMAND_ARG SINTERCARD_Args[] = {
 #ifndef SKIP_CMD_KEY_SPECS_TABLE
 /* SINTERSTORE key specs */
 keySpec SINTERSTORE_Keyspecs[2] = {
-{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={-1,1,0}}
+{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}},{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={2},KSPEC_FK_RANGE,.fk.range={-1,1,0}}
 };
 #endif
 
@@ -9321,7 +9705,7 @@ struct COMMAND_ARG XGROUP_CREATE_Args[] = {
 {MAKE_ARG("group",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)},
 {MAKE_ARG("id-selector",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=XGROUP_CREATE_id_selector_Subargs},
 {MAKE_ARG("mkstream",ARG_TYPE_PURE_TOKEN,-1,"MKSTREAM",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
-{MAKE_ARG("entries-read",ARG_TYPE_INTEGER,-1,"ENTRIESREAD",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("entriesread",ARG_TYPE_INTEGER,-1,"ENTRIESREAD",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL),.display_text="entries-read"},
 };
 
 /********** XGROUP CREATECONSUMER ********************/
@@ -9486,7 +9870,7 @@ struct COMMAND_STRUCT XGROUP_Subcommands[] = {
 #ifndef SKIP_CMD_HISTORY_TABLE
 /* XINFO CONSUMERS history */
 commandHistory XINFO_CONSUMERS_History[] = {
-{"7.2.0","Added the `inactive` field."},
+{"7.2.0","Added the `inactive` field, and changed the meaning of `idle`."},
 };
 #endif
 
@@ -10263,10 +10647,7 @@ struct COMMAND_ARG MSET_Args[] = {
 
 #ifndef SKIP_CMD_TIPS_TABLE
 /* MSETNX tips */
-const char *MSETNX_Tips[] = {
-"request_policy:multi_shard",
-"response_policy:agg_min",
-};
+#define MSETNX_Tips NULL
 #endif
 
 #ifndef SKIP_CMD_KEY_SPECS_TABLE
@@ -10621,33 +11002,36 @@ struct COMMAND_STRUCT redisCommandTable[] = {
 {MAKE_CMD("pexpireat","Sets the expiration time of a key to a Unix milliseconds timestamp.","O(1)","2.6.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,PEXPIREAT_History,1,PEXPIREAT_Tips,0,pexpireatCommand,-3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_KEYSPACE,PEXPIREAT_Keyspecs,1,NULL,3),.args=PEXPIREAT_Args},
 {MAKE_CMD("pexpiretime","Returns the expiration time of a key as a Unix milliseconds timestamp.","O(1)","7.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,PEXPIRETIME_History,0,PEXPIRETIME_Tips,0,pexpiretimeCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_KEYSPACE,PEXPIRETIME_Keyspecs,1,NULL,1),.args=PEXPIRETIME_Args},
 {MAKE_CMD("pttl","Returns the expiration time in milliseconds of a key.","O(1)","2.6.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,PTTL_History,1,PTTL_Tips,1,pttlCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_KEYSPACE,PTTL_Keyspecs,1,NULL,1),.args=PTTL_Args},
-{MAKE_CMD("randomkey","Returns a random key name from the database.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,RANDOMKEY_History,0,RANDOMKEY_Tips,2,randomkeyCommand,1,CMD_READONLY|CMD_TOUCHES_ARBITRARY_KEYS,ACL_CATEGORY_KEYSPACE,RANDOMKEY_Keyspecs,0,NULL,0)},
+{MAKE_CMD("randomkey","Returns a random key name from the database.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,RANDOMKEY_History,0,RANDOMKEY_Tips,3,randomkeyCommand,1,CMD_READONLY|CMD_TOUCHES_ARBITRARY_KEYS,ACL_CATEGORY_KEYSPACE,RANDOMKEY_Keyspecs,0,NULL,0)},
 {MAKE_CMD("rename","Renames a key and overwrites the destination.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,RENAME_History,0,RENAME_Tips,0,renameCommand,3,CMD_WRITE,ACL_CATEGORY_KEYSPACE,RENAME_Keyspecs,2,NULL,2),.args=RENAME_Args},
 {MAKE_CMD("renamenx","Renames a key only when the target key name doesn't exist.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,RENAMENX_History,1,RENAMENX_Tips,0,renamenxCommand,3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_KEYSPACE,RENAMENX_Keyspecs,2,NULL,2),.args=RENAMENX_Args},
 {MAKE_CMD("restore","Creates a key from the serialized representation of a value.","O(1) to create the new key and additional O(N*M) to reconstruct the serialized value, where N is the number of Redis objects composing the value and M their average size. For small string values the time complexity is thus O(1)+O(1*M) where M is small, so simply O(1). However for sorted set values the complexity is O(N*M*log(N)) because inserting values into sorted sets is O(log(N)).","2.6.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,RESTORE_History,3,RESTORE_Tips,0,restoreCommand,-4,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_KEYSPACE|ACL_CATEGORY_DANGEROUS,RESTORE_Keyspecs,1,NULL,7),.args=RESTORE_Args},
-{MAKE_CMD("scan","Iterates over the key names in the database.","O(1) for every call. O(N) for a complete iteration, including enough command calls for the cursor to return back to 0. N is the number of elements inside the collection.","2.8.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,SCAN_History,1,SCAN_Tips,2,scanCommand,-2,CMD_READONLY|CMD_TOUCHES_ARBITRARY_KEYS,ACL_CATEGORY_KEYSPACE,SCAN_Keyspecs,0,NULL,4),.args=SCAN_Args},
+{MAKE_CMD("scan","Iterates over the key names in the database.","O(1) for every call. O(N) for a complete iteration, including enough command calls for the cursor to return back to 0. N is the number of elements inside the collection.","2.8.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,SCAN_History,1,SCAN_Tips,3,scanCommand,-2,CMD_READONLY|CMD_TOUCHES_ARBITRARY_KEYS,ACL_CATEGORY_KEYSPACE,SCAN_Keyspecs,0,NULL,4),.args=SCAN_Args},
 {MAKE_CMD("sort","Sorts the elements in a list, a set, or a sorted set, optionally storing the result.","O(N+M*log(M)) where N is the number of elements in the list or set to sort, and M the number of returned elements. When the elements are not sorted, complexity is O(N).","1.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,SORT_History,0,SORT_Tips,0,sortCommand,-2,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_SET|ACL_CATEGORY_SORTEDSET|ACL_CATEGORY_LIST|ACL_CATEGORY_DANGEROUS,SORT_Keyspecs,3,sortGetKeys,7),.args=SORT_Args},
 {MAKE_CMD("sort_ro","Returns the sorted elements of a list, a set, or a sorted set.","O(N+M*log(M)) where N is the number of elements in the list or set to sort, and M the number of returned elements. When the elements are not sorted, complexity is O(N).","7.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,SORT_RO_History,0,SORT_RO_Tips,0,sortroCommand,-2,CMD_READONLY,ACL_CATEGORY_SET|ACL_CATEGORY_SORTEDSET|ACL_CATEGORY_LIST|ACL_CATEGORY_DANGEROUS,SORT_RO_Keyspecs,2,sortROGetKeys,6),.args=SORT_RO_Args},
 {MAKE_CMD("touch","Returns the number of existing keys out of those specified after updating the time they were last accessed.","O(N) where N is the number of keys that will be touched.","3.2.1",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,TOUCH_History,0,TOUCH_Tips,2,touchCommand,-2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_KEYSPACE,TOUCH_Keyspecs,1,NULL,1),.args=TOUCH_Args},
 {MAKE_CMD("ttl","Returns the expiration time in seconds of a key.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,TTL_History,1,TTL_Tips,1,ttlCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_KEYSPACE,TTL_Keyspecs,1,NULL,1),.args=TTL_Args},
 {MAKE_CMD("type","Determines the type of value stored at a key.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,TYPE_History,0,TYPE_Tips,0,typeCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_KEYSPACE,TYPE_Keyspecs,1,NULL,1),.args=TYPE_Args},
 {MAKE_CMD("unlink","Asynchronously deletes one or more keys.","O(1) for each key removed regardless of its size. Then the command does O(N) work in a different thread in order to reclaim memory, where N is the number of allocations the deleted objects where composed of.","4.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,UNLINK_History,0,UNLINK_Tips,2,unlinkCommand,-2,CMD_WRITE|CMD_FAST,ACL_CATEGORY_KEYSPACE,UNLINK_Keyspecs,1,NULL,1),.args=UNLINK_Args},
-{MAKE_CMD("wait","Blocks until the asynchronous replication of all preceding write commands sent by the connection is completed.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,WAIT_History,0,WAIT_Tips,2,waitCommand,3,0,ACL_CATEGORY_CONNECTION,WAIT_Keyspecs,0,NULL,2),.args=WAIT_Args},
-{MAKE_CMD("waitaof","Blocks until all of the preceding write commands sent by the connection are written to the append-only file of the master and/or replicas.","O(1)","7.2.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,WAITAOF_History,0,WAITAOF_Tips,2,waitaofCommand,4,CMD_NOSCRIPT,ACL_CATEGORY_CONNECTION,WAITAOF_Keyspecs,0,NULL,3),.args=WAITAOF_Args},
+{MAKE_CMD("wait","Blocks until the asynchronous replication of all preceding write commands sent by the connection is completed.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,WAIT_History,0,WAIT_Tips,2,waitCommand,3,CMD_BLOCKING,ACL_CATEGORY_CONNECTION,WAIT_Keyspecs,0,NULL,2),.args=WAIT_Args},
+{MAKE_CMD("waitaof","Blocks until all of the preceding write commands sent by the connection are written to the append-only file of the master and/or replicas.","O(1)","7.2.0",CMD_DOC_NONE,NULL,NULL,"generic",COMMAND_GROUP_GENERIC,WAITAOF_History,0,WAITAOF_Tips,2,waitaofCommand,4,CMD_BLOCKING,ACL_CATEGORY_CONNECTION,WAITAOF_Keyspecs,0,NULL,3),.args=WAITAOF_Args},
 /* geo */
 {MAKE_CMD("geoadd","Adds one or more members to a geospatial index. The key is created if it doesn't exist.","O(log(N)) for each item added, where N is the number of elements in the sorted set.","3.2.0",CMD_DOC_NONE,NULL,NULL,"geo",COMMAND_GROUP_GEO,GEOADD_History,1,GEOADD_Tips,0,geoaddCommand,-5,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_GEO,GEOADD_Keyspecs,1,NULL,4),.args=GEOADD_Args},
-{MAKE_CMD("geodist","Returns the distance between two members of a geospatial index.","O(log(N))","3.2.0",CMD_DOC_NONE,NULL,NULL,"geo",COMMAND_GROUP_GEO,GEODIST_History,0,GEODIST_Tips,0,geodistCommand,-4,CMD_READONLY,ACL_CATEGORY_GEO,GEODIST_Keyspecs,1,NULL,4),.args=GEODIST_Args},
-{MAKE_CMD("geohash","Returns members from a geospatial index as geohash strings.","O(log(N)) for each member requested, where N is the number of elements in the sorted set.","3.2.0",CMD_DOC_NONE,NULL,NULL,"geo",COMMAND_GROUP_GEO,GEOHASH_History,0,GEOHASH_Tips,0,geohashCommand,-2,CMD_READONLY,ACL_CATEGORY_GEO,GEOHASH_Keyspecs,1,NULL,2),.args=GEOHASH_Args},
-{MAKE_CMD("geopos","Returns the longitude and latitude of members from a geospatial index.","O(N) where N is the number of members requested.","3.2.0",CMD_DOC_NONE,NULL,NULL,"geo",COMMAND_GROUP_GEO,GEOPOS_History,0,GEOPOS_Tips,0,geoposCommand,-2,CMD_READONLY,ACL_CATEGORY_GEO,GEOPOS_Keyspecs,1,NULL,2),.args=GEOPOS_Args},
+{MAKE_CMD("geodist","Returns the distance between two members of a geospatial index.","O(1)","3.2.0",CMD_DOC_NONE,NULL,NULL,"geo",COMMAND_GROUP_GEO,GEODIST_History,0,GEODIST_Tips,0,geodistCommand,-4,CMD_READONLY,ACL_CATEGORY_GEO,GEODIST_Keyspecs,1,NULL,4),.args=GEODIST_Args},
+{MAKE_CMD("geohash","Returns members from a geospatial index as geohash strings.","O(1) for each member requested.","3.2.0",CMD_DOC_NONE,NULL,NULL,"geo",COMMAND_GROUP_GEO,GEOHASH_History,0,GEOHASH_Tips,0,geohashCommand,-2,CMD_READONLY,ACL_CATEGORY_GEO,GEOHASH_Keyspecs,1,NULL,2),.args=GEOHASH_Args},
+{MAKE_CMD("geopos","Returns the longitude and latitude of members from a geospatial index.","O(1) for each member requested.","3.2.0",CMD_DOC_NONE,NULL,NULL,"geo",COMMAND_GROUP_GEO,GEOPOS_History,0,GEOPOS_Tips,0,geoposCommand,-2,CMD_READONLY,ACL_CATEGORY_GEO,GEOPOS_Keyspecs,1,NULL,2),.args=GEOPOS_Args},
 {MAKE_CMD("georadius","Queries a geospatial index for members within a distance from a coordinate, optionally stores the result.","O(N+log(M)) where N is the number of elements inside the bounding box of the circular area delimited by center and radius and M is the number of items inside the index.","3.2.0",CMD_DOC_DEPRECATED,"`GEOSEARCH` and `GEOSEARCHSTORE` with the `BYRADIUS` argument","6.2.0","geo",COMMAND_GROUP_GEO,GEORADIUS_History,2,GEORADIUS_Tips,0,georadiusCommand,-6,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_GEO,GEORADIUS_Keyspecs,3,georadiusGetKeys,11),.args=GEORADIUS_Args},
-{MAKE_CMD("georadiusbymember","Queries a geospatial index for members within a distance from a member, optionally stores the result.","O(N+log(M)) where N is the number of elements inside the bounding box of the circular area delimited by center and radius and M is the number of items inside the index.","3.2.0",CMD_DOC_DEPRECATED,"`GEOSEARCH` and `GEOSEARCHSTORE` with the `BYRADIUS` and `FROMMEMBER` arguments","6.2.0","geo",COMMAND_GROUP_GEO,GEORADIUSBYMEMBER_History,1,GEORADIUSBYMEMBER_Tips,0,georadiusbymemberCommand,-5,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_GEO,GEORADIUSBYMEMBER_Keyspecs,3,georadiusGetKeys,10),.args=GEORADIUSBYMEMBER_Args},
-{MAKE_CMD("georadiusbymember_ro","Returns members from a geospatial index that are within a distance from a member.","O(N+log(M)) where N is the number of elements inside the bounding box of the circular area delimited by center and radius and M is the number of items inside the index.","3.2.10",CMD_DOC_DEPRECATED,"`GEOSEARCH` with the `BYRADIUS` and `FROMMEMBER` arguments","6.2.0","geo",COMMAND_GROUP_GEO,GEORADIUSBYMEMBER_RO_History,0,GEORADIUSBYMEMBER_RO_Tips,0,georadiusbymemberroCommand,-5,CMD_READONLY,ACL_CATEGORY_GEO,GEORADIUSBYMEMBER_RO_Keyspecs,1,NULL,9),.args=GEORADIUSBYMEMBER_RO_Args},
-{MAKE_CMD("georadius_ro","Returns members from a geospatial index that are within a distance from a coordinate.","O(N+log(M)) where N is the number of elements inside the bounding box of the circular area delimited by center and radius and M is the number of items inside the index.","3.2.10",CMD_DOC_DEPRECATED,"`GEOSEARCH` with the `BYRADIUS` argument","6.2.0","geo",COMMAND_GROUP_GEO,GEORADIUS_RO_History,1,GEORADIUS_RO_Tips,0,georadiusroCommand,-6,CMD_READONLY,ACL_CATEGORY_GEO,GEORADIUS_RO_Keyspecs,1,NULL,10),.args=GEORADIUS_RO_Args},
+{MAKE_CMD("georadiusbymember","Queries a geospatial index for members within a distance from a member, optionally stores the result.","O(N+log(M)) where N is the number of elements inside the bounding box of the circular area delimited by center and radius and M is the number of items inside the index.","3.2.0",CMD_DOC_DEPRECATED,"`GEOSEARCH` and `GEOSEARCHSTORE` with the `BYRADIUS` and `FROMMEMBER` arguments","6.2.0","geo",COMMAND_GROUP_GEO,GEORADIUSBYMEMBER_History,2,GEORADIUSBYMEMBER_Tips,0,georadiusbymemberCommand,-5,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_GEO,GEORADIUSBYMEMBER_Keyspecs,3,georadiusGetKeys,10),.args=GEORADIUSBYMEMBER_Args},
+{MAKE_CMD("georadiusbymember_ro","Returns members from a geospatial index that are within a distance from a member.","O(N+log(M)) where N is the number of elements inside the bounding box of the circular area delimited by center and radius and M is the number of items inside the index.","3.2.10",CMD_DOC_DEPRECATED,"`GEOSEARCH` with the `BYRADIUS` and `FROMMEMBER` arguments","6.2.0","geo",COMMAND_GROUP_GEO,GEORADIUSBYMEMBER_RO_History,2,GEORADIUSBYMEMBER_RO_Tips,0,georadiusbymemberroCommand,-5,CMD_READONLY,ACL_CATEGORY_GEO,GEORADIUSBYMEMBER_RO_Keyspecs,1,NULL,9),.args=GEORADIUSBYMEMBER_RO_Args},
+{MAKE_CMD("georadius_ro","Returns members from a geospatial index that are within a distance from a coordinate.","O(N+log(M)) where N is the number of elements inside the bounding box of the circular area delimited by center and radius and M is the number of items inside the index.","3.2.10",CMD_DOC_DEPRECATED,"`GEOSEARCH` with the `BYRADIUS` argument","6.2.0","geo",COMMAND_GROUP_GEO,GEORADIUS_RO_History,2,GEORADIUS_RO_Tips,0,georadiusroCommand,-6,CMD_READONLY,ACL_CATEGORY_GEO,GEORADIUS_RO_Keyspecs,1,NULL,10),.args=GEORADIUS_RO_Args},
 {MAKE_CMD("geosearch","Queries a geospatial index for members inside an area of a box or a circle.","O(N+log(M)) where N is the number of elements in the grid-aligned bounding box area around the shape provided as the filter and M is the number of items inside the shape","6.2.0",CMD_DOC_NONE,NULL,NULL,"geo",COMMAND_GROUP_GEO,GEOSEARCH_History,1,GEOSEARCH_Tips,0,geosearchCommand,-7,CMD_READONLY,ACL_CATEGORY_GEO,GEOSEARCH_Keyspecs,1,NULL,8),.args=GEOSEARCH_Args},
 {MAKE_CMD("geosearchstore","Queries a geospatial index for members inside an area of a box or a circle, optionally stores the result.","O(N+log(M)) where N is the number of elements in the grid-aligned bounding box area around the shape provided as the filter and M is the number of items inside the shape","6.2.0",CMD_DOC_NONE,NULL,NULL,"geo",COMMAND_GROUP_GEO,GEOSEARCHSTORE_History,1,GEOSEARCHSTORE_Tips,0,geosearchstoreCommand,-8,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_GEO,GEOSEARCHSTORE_Keyspecs,2,NULL,7),.args=GEOSEARCHSTORE_Args},
 /* hash */
 {MAKE_CMD("hdel","Deletes one or more fields and their values from a hash. Deletes the hash if no fields remain.","O(N) where N is the number of fields to be removed.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HDEL_History,1,HDEL_Tips,0,hdelCommand,-3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HDEL_Keyspecs,1,NULL,2),.args=HDEL_Args},
 {MAKE_CMD("hexists","Determines whether a field exists in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXISTS_History,0,HEXISTS_Tips,0,hexistsCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HEXISTS_Keyspecs,1,NULL,2),.args=HEXISTS_Args},
+{MAKE_CMD("hexpire","Set expiry for hash field using relative time to expire (seconds)","O(N) where N is the number of specified fields","7.4.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIRE_History,0,HEXPIRE_Tips,0,hexpireCommand,-6,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HEXPIRE_Keyspecs,1,NULL,4),.args=HEXPIRE_Args},
+{MAKE_CMD("hexpireat","Set expiry for hash field using an absolute Unix timestamp (seconds)","O(N) where N is the number of specified fields","7.4.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIREAT_History,0,HEXPIREAT_Tips,0,hexpireatCommand,-6,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HEXPIREAT_Keyspecs,1,NULL,4),.args=HEXPIREAT_Args},
+{MAKE_CMD("hexpiretime","Returns the expiration time of a hash field as a Unix timestamp, in seconds.","O(N) where N is the number of specified fields","7.4.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIRETIME_History,0,HEXPIRETIME_Tips,0,hexpiretimeCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HEXPIRETIME_Keyspecs,1,NULL,2),.args=HEXPIRETIME_Args},
 {MAKE_CMD("hget","Returns the value of a field in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGET_History,0,HGET_Tips,0,hgetCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HGET_Keyspecs,1,NULL,2),.args=HGET_Args},
 {MAKE_CMD("hgetall","Returns all fields and values in a hash.","O(N) where N is the size of the hash.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGETALL_History,0,HGETALL_Tips,1,hgetallCommand,2,CMD_READONLY,ACL_CATEGORY_HASH,HGETALL_Keyspecs,1,NULL,1),.args=HGETALL_Args},
 {MAKE_CMD("hincrby","Increments the integer value of a field in a hash by a number. Uses 0 as initial value if the field doesn't exist.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HINCRBY_History,0,HINCRBY_Tips,0,hincrbyCommand,4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HINCRBY_Keyspecs,1,NULL,3),.args=HINCRBY_Args},
@@ -10656,11 +11040,17 @@ struct COMMAND_STRUCT redisCommandTable[] = {
 {MAKE_CMD("hlen","Returns the number of fields in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HLEN_History,0,HLEN_Tips,0,hlenCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HLEN_Keyspecs,1,NULL,1),.args=HLEN_Args},
 {MAKE_CMD("hmget","Returns the values of all fields in a hash.","O(N) where N is the number of fields being requested.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HMGET_History,0,HMGET_Tips,0,hmgetCommand,-3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HMGET_Keyspecs,1,NULL,2),.args=HMGET_Args},
 {MAKE_CMD("hmset","Sets the values of multiple fields.","O(N) where N is the number of fields being set.","2.0.0",CMD_DOC_DEPRECATED,"`HSET` with multiple field-value pairs","4.0.0","hash",COMMAND_GROUP_HASH,HMSET_History,0,HMSET_Tips,0,hsetCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HMSET_Keyspecs,1,NULL,2),.args=HMSET_Args},
+{MAKE_CMD("hpersist","Removes the expiration time for each specified field","O(N) where N is the number of specified fields","7.4.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPERSIST_History,0,HPERSIST_Tips,0,hpersistCommand,-5,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HPERSIST_Keyspecs,1,NULL,2),.args=HPERSIST_Args},
+{MAKE_CMD("hpexpire","Set expiry for hash field using relative time to expire (milliseconds)","O(N) where N is the number of specified fields","7.4.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIRE_History,0,HPEXPIRE_Tips,0,hpexpireCommand,-6,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIRE_Keyspecs,1,NULL,4),.args=HPEXPIRE_Args},
+{MAKE_CMD("hpexpireat","Set expiry for hash field using an absolute Unix timestamp (milliseconds)","O(N) where N is the number of specified fields","7.4.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIREAT_History,0,HPEXPIREAT_Tips,0,hpexpireatCommand,-6,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIREAT_Keyspecs,1,NULL,4),.args=HPEXPIREAT_Args},
+{MAKE_CMD("hpexpiretime","Returns the expiration time of a hash field as a Unix timestamp, in msec.","O(N) where N is the number of specified fields","7.4.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIRETIME_History,0,HPEXPIRETIME_Tips,0,hpexpiretimeCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIRETIME_Keyspecs,1,NULL,2),.args=HPEXPIRETIME_Args},
+{MAKE_CMD("hpttl","Returns the TTL in milliseconds of a hash field.","O(N) where N is the number of specified fields","7.4.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPTTL_History,0,HPTTL_Tips,0,hpttlCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HPTTL_Keyspecs,1,NULL,2),.args=HPTTL_Args},
 {MAKE_CMD("hrandfield","Returns one or more random fields from a hash.","O(N) where N is the number of fields returned","6.2.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HRANDFIELD_History,0,HRANDFIELD_Tips,1,hrandfieldCommand,-2,CMD_READONLY,ACL_CATEGORY_HASH,HRANDFIELD_Keyspecs,1,NULL,2),.args=HRANDFIELD_Args},
-{MAKE_CMD("hscan","Iterates over fields and values of a hash.","O(1) for every call. O(N) for a complete iteration, including enough command calls for the cursor to return back to 0. N is the number of elements inside the collection.","2.8.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSCAN_History,0,HSCAN_Tips,1,hscanCommand,-3,CMD_READONLY,ACL_CATEGORY_HASH,HSCAN_Keyspecs,1,NULL,4),.args=HSCAN_Args},
+{MAKE_CMD("hscan","Iterates over fields and values of a hash.","O(1) for every call. O(N) for a complete iteration, including enough command calls for the cursor to return back to 0. N is the number of elements inside the collection.","2.8.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSCAN_History,0,HSCAN_Tips,1,hscanCommand,-3,CMD_READONLY,ACL_CATEGORY_HASH,HSCAN_Keyspecs,1,NULL,5),.args=HSCAN_Args},
 {MAKE_CMD("hset","Creates or modifies the value of a field in a hash.","O(1) for each field/value pair added, so O(N) to add N field/value pairs when the command is called with multiple field/value pairs.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSET_History,1,HSET_Tips,0,hsetCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HSET_Keyspecs,1,NULL,2),.args=HSET_Args},
 {MAKE_CMD("hsetnx","Sets the value of a field in a hash only when the field doesn't exist.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSETNX_History,0,HSETNX_Tips,0,hsetnxCommand,4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HSETNX_Keyspecs,1,NULL,3),.args=HSETNX_Args},
 {MAKE_CMD("hstrlen","Returns the length of the value of a field.","O(1)","3.2.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSTRLEN_History,0,HSTRLEN_Tips,0,hstrlenCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HSTRLEN_Keyspecs,1,NULL,2),.args=HSTRLEN_Args},
+{MAKE_CMD("httl","Returns the TTL in seconds of a hash field.","O(N) where N is the number of specified fields","7.4.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HTTL_History,0,HTTL_Tips,0,httlCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HTTL_Keyspecs,1,NULL,2),.args=HTTL_Args},
 {MAKE_CMD("hvals","Returns all values in a hash.","O(N) where N is the size of the hash.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HVALS_History,0,HVALS_Tips,1,hvalsCommand,2,CMD_READONLY,ACL_CATEGORY_HASH,HVALS_Keyspecs,1,NULL,1),.args=HVALS_Args},
 /* hyperloglog */
 {MAKE_CMD("pfadd","Adds elements to a HyperLogLog key. Creates the key if it doesn't exist.","O(1) to add every element.","2.8.9",CMD_DOC_NONE,NULL,NULL,"hyperloglog",COMMAND_GROUP_HYPERLOGLOG,PFADD_History,0,PFADD_Tips,0,pfaddCommand,-2,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HYPERLOGLOG,PFADD_Keyspecs,1,NULL,2),.args=PFADD_Args},
@@ -10732,12 +11122,12 @@ struct COMMAND_STRUCT redisCommandTable[] = {
 {MAKE_CMD("monitor","Listens for all requests received by the server in real-time.",NULL,"1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,MONITOR_History,0,MONITOR_Tips,0,monitorCommand,1,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,MONITOR_Keyspecs,0,NULL,0)},
 {MAKE_CMD("psync","An internal command used in replication.",NULL,"2.8.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,PSYNC_History,0,PSYNC_Tips,0,syncCommand,-3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NO_MULTI|CMD_NOSCRIPT,0,PSYNC_Keyspecs,0,NULL,2),.args=PSYNC_Args},
 {MAKE_CMD("replconf","An internal command for configuring the replication stream.","O(1)","3.0.0",CMD_DOC_SYSCMD,NULL,NULL,"server",COMMAND_GROUP_SERVER,REPLCONF_History,0,REPLCONF_Tips,0,replconfCommand,-1,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_ALLOW_BUSY,0,REPLCONF_Keyspecs,0,NULL,0)},
-{MAKE_CMD("replicaof","Configures a server as replica of another, or promotes it to a master.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,REPLICAOF_History,0,REPLICAOF_Tips,0,replicaofCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT|CMD_STALE,0,REPLICAOF_Keyspecs,0,NULL,2),.args=REPLICAOF_Args},
+{MAKE_CMD("replicaof","Configures a server as replica of another, or promotes it to a master.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,REPLICAOF_History,0,REPLICAOF_Tips,0,replicaofCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT|CMD_STALE,0,REPLICAOF_Keyspecs,0,NULL,1),.args=REPLICAOF_Args},
 {MAKE_CMD("restore-asking","An internal command for migrating keys in a cluster.","O(1) to create the new key and additional O(N*M) to reconstruct the serialized value, where N is the number of Redis objects composing the value and M their average size. For small string values the time complexity is thus O(1)+O(1*M) where M is small, so simply O(1). However for sorted set values the complexity is O(N*M*log(N)) because inserting values into sorted sets is O(log(N)).","3.0.0",CMD_DOC_SYSCMD,NULL,NULL,"server",COMMAND_GROUP_SERVER,RESTORE_ASKING_History,3,RESTORE_ASKING_Tips,0,restoreCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_ASKING,ACL_CATEGORY_KEYSPACE|ACL_CATEGORY_DANGEROUS,RESTORE_ASKING_Keyspecs,1,NULL,7),.args=RESTORE_ASKING_Args},
 {MAKE_CMD("role","Returns the replication role.","O(1)","2.8.12",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ROLE_History,0,ROLE_Tips,0,roleCommand,1,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_SENTINEL,ACL_CATEGORY_ADMIN|ACL_CATEGORY_DANGEROUS,ROLE_Keyspecs,0,NULL,0)},
 {MAKE_CMD("save","Synchronously saves the database(s) to disk.","O(N) where N is the total number of keys in all databases","1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SAVE_History,0,SAVE_Tips,0,saveCommand,1,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT|CMD_NO_MULTI,0,SAVE_Keyspecs,0,NULL,0)},
 {MAKE_CMD("shutdown","Synchronously saves the database(s) to disk and shuts down the Redis server.","O(N) when saving, where N is the total number of keys in all databases when saving data, otherwise O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SHUTDOWN_History,1,SHUTDOWN_Tips,0,shutdownCommand,-1,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_NO_MULTI|CMD_SENTINEL|CMD_ALLOW_BUSY,0,SHUTDOWN_Keyspecs,0,NULL,4),.args=SHUTDOWN_Args},
-{MAKE_CMD("slaveof","Sets a Redis server as a replica of another, or promotes it to being a master.","O(1)","1.0.0",CMD_DOC_DEPRECATED,"`REPLICAOF`","5.0.0","server",COMMAND_GROUP_SERVER,SLAVEOF_History,0,SLAVEOF_Tips,0,replicaofCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT|CMD_STALE,0,SLAVEOF_Keyspecs,0,NULL,2),.args=SLAVEOF_Args},
+{MAKE_CMD("slaveof","Sets a Redis server as a replica of another, or promotes it to being a master.","O(1)","1.0.0",CMD_DOC_DEPRECATED,"`REPLICAOF`","5.0.0","server",COMMAND_GROUP_SERVER,SLAVEOF_History,0,SLAVEOF_Tips,0,replicaofCommand,3,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT|CMD_STALE,0,SLAVEOF_Keyspecs,0,NULL,1),.args=SLAVEOF_Args},
 {MAKE_CMD("slowlog","A container for slow log commands.","Depends on subcommand.","2.2.12",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SLOWLOG_History,0,SLOWLOG_Tips,0,NULL,-2,0,0,SLOWLOG_Keyspecs,0,NULL,0),.subcommands=SLOWLOG_Subcommands},
 {MAKE_CMD("swapdb","Swaps two Redis databases.","O(N) where N is the count of clients watching or blocking on keys from both databases.","4.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SWAPDB_History,0,SWAPDB_Tips,0,swapdbCommand,3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_KEYSPACE|ACL_CATEGORY_DANGEROUS,SWAPDB_Keyspecs,0,NULL,2),.args=SWAPDB_Args},
 {MAKE_CMD("sync","An internal command used in replication.",NULL,"1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,SYNC_History,0,SYNC_Tips,0,syncCommand,1,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NO_MULTI|CMD_NOSCRIPT,0,SYNC_Keyspecs,0,NULL,0)},
@@ -10807,7 +11197,7 @@ struct COMMAND_STRUCT redisCommandTable[] = {
 {MAKE_CMD("xlen","Return the number of messages in a stream.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XLEN_History,0,XLEN_Tips,0,xlenCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_STREAM,XLEN_Keyspecs,1,NULL,1),.args=XLEN_Args},
 {MAKE_CMD("xpending","Returns the information and entries from a stream consumer group's pending entries list.","O(N) with N being the number of elements returned, so asking for a small fixed number of entries per call is O(1). O(M), where M is the total number of entries scanned when used with the IDLE filter. When the command returns just the summary and the list of consumers is small, it runs in O(1) time; otherwise, an additional O(N) time for iterating every consumer.","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XPENDING_History,1,XPENDING_Tips,1,xpendingCommand,-3,CMD_READONLY,ACL_CATEGORY_STREAM,XPENDING_Keyspecs,1,NULL,3),.args=XPENDING_Args},
 {MAKE_CMD("xrange","Returns the messages from a stream within a range of IDs.","O(N) with N being the number of elements being returned. If N is constant (e.g. always asking for the first 10 elements with COUNT), you can consider it O(1).","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XRANGE_History,1,XRANGE_Tips,0,xrangeCommand,-4,CMD_READONLY,ACL_CATEGORY_STREAM,XRANGE_Keyspecs,1,NULL,4),.args=XRANGE_Args},
-{MAKE_CMD("xread","Returns messages from multiple streams with IDs greater than the ones requested. Blocks until a message is available otherwise.",NULL,"5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XREAD_History,0,XREAD_Tips,0,xreadCommand,-4,CMD_BLOCKING|CMD_READONLY|CMD_BLOCKING,ACL_CATEGORY_STREAM,XREAD_Keyspecs,1,xreadGetKeys,3),.args=XREAD_Args},
+{MAKE_CMD("xread","Returns messages from multiple streams with IDs greater than the ones requested. Blocks until a message is available otherwise.",NULL,"5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XREAD_History,0,XREAD_Tips,0,xreadCommand,-4,CMD_BLOCKING|CMD_READONLY,ACL_CATEGORY_STREAM,XREAD_Keyspecs,1,xreadGetKeys,3),.args=XREAD_Args},
 {MAKE_CMD("xreadgroup","Returns new or historical messages from a stream for a consumer in a group. Blocks until a message is available otherwise.","For each stream mentioned: O(M) with M being the number of elements returned. If M is constant (e.g. always asking for the first 10 elements with COUNT), you can consider it O(1). On the other side when XREADGROUP blocks, XADD will pay the O(N) time in order to serve the N clients blocked on the stream getting new data.","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XREADGROUP_History,0,XREADGROUP_Tips,0,xreadCommand,-7,CMD_BLOCKING|CMD_WRITE,ACL_CATEGORY_STREAM,XREADGROUP_Keyspecs,1,xreadGetKeys,5),.args=XREADGROUP_Args},
 {MAKE_CMD("xrevrange","Returns the messages from a stream within a range of IDs in reverse order.","O(N) with N being the number of elements returned. If N is constant (e.g. always asking for the first 10 elements with COUNT), you can consider it O(1).","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XREVRANGE_History,1,XREVRANGE_Tips,0,xrevrangeCommand,-4,CMD_READONLY,ACL_CATEGORY_STREAM,XREVRANGE_Keyspecs,1,NULL,4),.args=XREVRANGE_Args},
 {MAKE_CMD("xsetid","An internal command for replicating stream values.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"stream",COMMAND_GROUP_STREAM,XSETID_History,1,XSETID_Tips,0,xsetidCommand,-3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_STREAM,XSETID_Keyspecs,1,NULL,4),.args=XSETID_Args},
@@ -10827,7 +11217,7 @@ struct COMMAND_STRUCT redisCommandTable[] = {
 {MAKE_CMD("lcs","Finds the longest common substring.","O(N*M) where N and M are the lengths of s1 and s2, respectively","7.0.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,LCS_History,0,LCS_Tips,0,lcsCommand,-3,CMD_READONLY,ACL_CATEGORY_STRING,LCS_Keyspecs,1,NULL,6),.args=LCS_Args},
 {MAKE_CMD("mget","Atomically returns the string values of one or more keys.","O(N) where N is the number of keys to retrieve.","1.0.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,MGET_History,0,MGET_Tips,1,mgetCommand,-2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_STRING,MGET_Keyspecs,1,NULL,1),.args=MGET_Args},
 {MAKE_CMD("mset","Atomically creates or modifies the string values of one or more keys.","O(N) where N is the number of keys to set.","1.0.1",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,MSET_History,0,MSET_Tips,2,msetCommand,-3,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_STRING,MSET_Keyspecs,1,NULL,1),.args=MSET_Args},
-{MAKE_CMD("msetnx","Atomically modifies the string values of one or more keys only when all keys don't exist.","O(N) where N is the number of keys to set.","1.0.1",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,MSETNX_History,0,MSETNX_Tips,2,msetnxCommand,-3,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_STRING,MSETNX_Keyspecs,1,NULL,1),.args=MSETNX_Args},
+{MAKE_CMD("msetnx","Atomically modifies the string values of one or more keys only when all keys don't exist.","O(N) where N is the number of keys to set.","1.0.1",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,MSETNX_History,0,MSETNX_Tips,0,msetnxCommand,-3,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_STRING,MSETNX_Keyspecs,1,NULL,1),.args=MSETNX_Args},
 {MAKE_CMD("psetex","Sets both string value and expiration time in milliseconds of a key. The key is created if it doesn't exist.","O(1)","2.6.0",CMD_DOC_DEPRECATED,"`SET` with the `PX` argument","2.6.12","string",COMMAND_GROUP_STRING,PSETEX_History,0,PSETEX_Tips,0,psetexCommand,4,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_STRING,PSETEX_Keyspecs,1,NULL,3),.args=PSETEX_Args},
 {MAKE_CMD("set","Sets the string value of a key, ignoring its type. The key is created if it doesn't exist.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,SET_History,4,SET_Tips,0,setCommand,-3,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_STRING,SET_Keyspecs,1,setGetKeys,5),.args=SET_Args},
 {MAKE_CMD("setex","Sets the string value and expiration time of a key. Creates the key if it doesn't exist.","O(1)","2.0.0",CMD_DOC_DEPRECATED,"`SET` with the `EX` argument","2.6.12","string",COMMAND_GROUP_STRING,SETEX_History,0,SETEX_Tips,0,setexCommand,4,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_STRING,SETEX_Keyspecs,1,NULL,3),.args=SETEX_Args},
diff --git a/src/commands.h b/src/commands.h
index 52acacfe0b1..1eefab4812b 100644
--- a/src/commands.h
+++ b/src/commands.h
@@ -19,7 +19,7 @@ typedef enum {
 #define CMD_ARG_MULTIPLE        (1<<1)
 #define CMD_ARG_MULTIPLE_TOKEN  (1<<2)
 
-/* WARNING! This struct must match RedisModuleCommandArg */
+/* Must be compatible with RedisModuleCommandArg. See moduleCopyCommandArgs. */
 typedef struct redisCommandArg {
     const char *name;
     redisCommandArgType type;
diff --git a/src/commands/README.md b/src/commands/README.md
new file mode 100644
index 00000000000..5ef9a3f2905
--- /dev/null
+++ b/src/commands/README.md
@@ -0,0 +1,15 @@
+This directory contains JSON files, one for each of Redis commands.
+
+Each JSON contains all the information about the command itself, but these JSON files are not to be used directly!
+Any third party who needs access to command information must get it from `COMMAND INFO` and `COMMAND DOCS`.
+The output can be extracted in a JSON format by using `redis-cli --json`, in the same manner as in `utils/generate-commands-json.py`.
+
+The JSON files are used to generate commands.def (and https://github.com/redis/redis-doc/blob/master/commands.json) in Redis, and
+despite looking similar to the output of `COMMAND` there are some fields and flags that are implicitly populated, and that's the
+reason one shouldn't rely on the raw files.
+
+The structure of each JSON is somewhat documented in https://redis.io/commands/command-docs/ and https://redis.io/commands/command/
+
+The `reply_schema` section is a standard JSON Schema (see https://json-schema.org/) that describes the reply of each command.
+It is designed to someday be used to auto-generate code in client libraries, but is not yet mature and is not exposed externally.
+
diff --git a/src/commands/acl-deluser.json b/src/commands/acl-deluser.json
index 4fc106503d4..80e8a7ad51b 100644
--- a/src/commands/acl-deluser.json
+++ b/src/commands/acl-deluser.json
@@ -14,6 +14,10 @@
             "STALE",
             "SENTINEL"
         ],
+        "command_tips": [
+          "REQUEST_POLICY:ALL_NODES",
+          "RESPONSE_POLICY:ALL_SUCCEEDED"
+        ],        
         "reply_schema": {
             "type": "integer",
             "description": "The number of users that were deleted"
diff --git a/src/commands/acl-save.json b/src/commands/acl-save.json
index 0b2af21e650..98d8dfd34bf 100644
--- a/src/commands/acl-save.json
+++ b/src/commands/acl-save.json
@@ -14,6 +14,10 @@
             "STALE",
             "SENTINEL"
         ],
+        "command_tips": [
+          "REQUEST_POLICY:ALL_NODES",
+          "RESPONSE_POLICY:ALL_SUCCEEDED"
+        ],
         "reply_schema": {
             "const": "OK"
         }
diff --git a/src/commands/acl-setuser.json b/src/commands/acl-setuser.json
index e26df464fe5..1a909170f1b 100644
--- a/src/commands/acl-setuser.json
+++ b/src/commands/acl-setuser.json
@@ -24,6 +24,10 @@
             "STALE",
             "SENTINEL"
         ],
+        "command_tips": [
+          "REQUEST_POLICY:ALL_NODES",
+          "RESPONSE_POLICY:ALL_SUCCEEDED"
+        ],        
         "reply_schema": {
             "const": "OK"
         },
diff --git a/src/commands/client-kill.json b/src/commands/client-kill.json
index bd0262d4e99..17f848cdae3 100644
--- a/src/commands/client-kill.json
+++ b/src/commands/client-kill.json
@@ -27,6 +27,10 @@
             [
                 "6.2.0",
                 "`LADDR` option."
+            ],
+            [
+                "7.4.0",
+                "`MAXAGE` option."
             ]
         ],
         "command_flags": [
@@ -136,6 +140,13 @@
                                         "token": "NO"
                                     }
                                 ]
+                            },
+                            {
+                                "token": "MAXAGE",
+                                "name": "maxage",
+                                "type": "integer",
+                                "optional": true,
+                                "since": "7.4.0"
                             }
                         ]
                     }
diff --git a/src/commands/client-setinfo.json b/src/commands/client-setinfo.json
index e61ba56645d..d0d8f7318f9 100644
--- a/src/commands/client-setinfo.json
+++ b/src/commands/client-setinfo.json
@@ -13,6 +13,10 @@
             "STALE",
             "SENTINEL"
         ],
+        "command_tips": [
+          "REQUEST_POLICY:ALL_NODES",
+          "RESPONSE_POLICY:ALL_SUCCEEDED"
+        ],        
         "acl_categories": [
             "CONNECTION"
         ],
diff --git a/src/commands/client-setname.json b/src/commands/client-setname.json
index e8920b686bc..b071bd18ff9 100644
--- a/src/commands/client-setname.json
+++ b/src/commands/client-setname.json
@@ -13,6 +13,10 @@
             "STALE",
             "SENTINEL"
         ],
+        "command_tips": [
+          "REQUEST_POLICY:ALL_NODES",
+          "RESPONSE_POLICY:ALL_SUCCEEDED"
+        ],        
         "acl_categories": [
             "CONNECTION"
         ],
diff --git a/src/commands/cluster-replicas.json b/src/commands/cluster-replicas.json
index 49a9227705e..e01617feebf 100644
--- a/src/commands/cluster-replicas.json
+++ b/src/commands/cluster-replicas.json
@@ -1,7 +1,7 @@
 {
     "REPLICAS": {
         "summary": "Lists the replica nodes of a master node.",
-        "complexity": "O(1)",
+        "complexity": "O(N) where N is the number of replicas.",
         "group": "cluster",
         "since": "5.0.0",
         "arity": 3,
diff --git a/src/commands/cluster-shards.json b/src/commands/cluster-shards.json
index dcaad3ea3eb..e7a08295347 100644
--- a/src/commands/cluster-shards.json
+++ b/src/commands/cluster-shards.json
@@ -26,7 +26,7 @@
                         "description": "an even number element array specifying the start and end slot numbers for slot ranges owned by this shard",
                         "type": "array",
                         "items": {
-                            "type": "string"
+                            "type": "integer"
                         }
                     },
                     "nodes": {
diff --git a/src/commands/cluster-slaves.json b/src/commands/cluster-slaves.json
index a2e6755a0a0..a736088e4c9 100644
--- a/src/commands/cluster-slaves.json
+++ b/src/commands/cluster-slaves.json
@@ -1,7 +1,7 @@
 {
     "SLAVES": {
         "summary": "Lists the replica nodes of a master node.",
-        "complexity": "O(1)",
+        "complexity": "O(N) where N is the number of replicas.",
         "group": "cluster",
         "since": "3.0.0",
         "arity": 3,
diff --git a/src/commands/config-resetstat.json b/src/commands/config-resetstat.json
index 87a08972a5d..fd6701f0d9e 100644
--- a/src/commands/config-resetstat.json
+++ b/src/commands/config-resetstat.json
@@ -13,6 +13,10 @@
             "LOADING",
             "STALE"
         ],
+        "command_tips": [
+          "REQUEST_POLICY:ALL_NODES",
+          "RESPONSE_POLICY:ALL_SUCCEEDED"
+        ],
         "reply_schema": {
             "const": "OK"
         }
diff --git a/src/commands/config-rewrite.json b/src/commands/config-rewrite.json
index 490e2f8e870..af49dd770f8 100644
--- a/src/commands/config-rewrite.json
+++ b/src/commands/config-rewrite.json
@@ -13,6 +13,10 @@
             "LOADING",
             "STALE"
         ],
+        "command_tips": [
+          "REQUEST_POLICY:ALL_NODES",
+          "RESPONSE_POLICY:ALL_SUCCEEDED"
+        ],
         "reply_schema": {
             "const": "OK"
         }
diff --git a/src/commands/geodist.json b/src/commands/geodist.json
index 97969d332c7..145ca718a87 100644
--- a/src/commands/geodist.json
+++ b/src/commands/geodist.json
@@ -1,7 +1,7 @@
 {
     "GEODIST": {
         "summary": "Returns the distance between two members of a geospatial index.",
-        "complexity": "O(log(N))",
+        "complexity": "O(1)",
         "group": "geo",
         "since": "3.2.0",
         "arity": -4,
diff --git a/src/commands/geohash.json b/src/commands/geohash.json
index 8f4d55a62b2..01402c4657a 100644
--- a/src/commands/geohash.json
+++ b/src/commands/geohash.json
@@ -1,7 +1,7 @@
 {
     "GEOHASH": {
         "summary": "Returns members from a geospatial index as geohash strings.",
-        "complexity": "O(log(N)) for each member requested, where N is the number of elements in the sorted set.",
+        "complexity": "O(1) for each member requested.",
         "group": "geo",
         "since": "3.2.0",
         "arity": -2,
diff --git a/src/commands/geopos.json b/src/commands/geopos.json
index 5473c1b76fa..408b6e6a39f 100644
--- a/src/commands/geopos.json
+++ b/src/commands/geopos.json
@@ -1,7 +1,7 @@
 {
     "GEOPOS": {
         "summary": "Returns the longitude and latitude of members from a geospatial index.",
-        "complexity": "O(N) where N is the number of members requested.",
+        "complexity": "O(1) for each member requested.",
         "group": "geo",
         "since": "3.2.0",
         "arity": -2,
diff --git a/src/commands/georadius_ro.json b/src/commands/georadius_ro.json
index 964246a2094..b3d335d4a1f 100644
--- a/src/commands/georadius_ro.json
+++ b/src/commands/georadius_ro.json
@@ -10,6 +10,10 @@
             [
                 "6.2.0",
                 "Added the `ANY` option for `COUNT`."
+            ],
+            [
+                "7.0.0",
+                "Added support for uppercase unit names."
             ]
         ],
         "deprecated_since": "6.2.0",
diff --git a/src/commands/georadiusbymember.json b/src/commands/georadiusbymember.json
index 4b627419baa..6102a1b163e 100644
--- a/src/commands/georadiusbymember.json
+++ b/src/commands/georadiusbymember.json
@@ -8,6 +8,10 @@
         "function": "georadiusbymemberCommand",
         "get_keys_function": "georadiusGetKeys",
         "history": [
+            [
+                "6.2.0",
+                "Added the `ANY` option for `COUNT`."
+            ],
             [
                 "7.0.0",
                 "Added support for uppercase unit names."
diff --git a/src/commands/georadiusbymember_ro.json b/src/commands/georadiusbymember_ro.json
index 59258819171..0cc599feff9 100644
--- a/src/commands/georadiusbymember_ro.json
+++ b/src/commands/georadiusbymember_ro.json
@@ -6,6 +6,16 @@
         "since": "3.2.10",
         "arity": -5,
         "function": "georadiusbymemberroCommand",
+        "history": [
+            [
+                "6.2.0",
+                "Added the `ANY` option for `COUNT`."
+            ],
+            [
+                "7.0.0", 
+                "Added support for uppercase unit names."
+            ]
+        ],
         "deprecated_since": "6.2.0",
         "replaced_by": "`GEOSEARCH` with the `BYRADIUS` and `FROMMEMBER` arguments",
         "doc_flags": [
diff --git a/src/commands/hexpire.json b/src/commands/hexpire.json
new file mode 100644
index 00000000000..832c182aea2
--- /dev/null
+++ b/src/commands/hexpire.json
@@ -0,0 +1,119 @@
+{
+    "HEXPIRE": {
+        "summary": "Set expiry for hash field using relative time to expire (seconds)",
+        "complexity": "O(N) where N is the number of specified fields",
+        "group": "hash",
+        "since": "7.4.0",
+        "arity": -6,
+        "function": "hexpireCommand",
+        "history": [],
+        "command_flags": [
+            "WRITE",
+            "DENYOOM",
+            "FAST"
+        ],
+        "acl_categories": [
+            "HASH"
+        ],
+        "key_specs": [
+            {
+                "flags": [
+                    "RW",
+                    "UPDATE"
+                ],
+                "begin_search": {
+                    "index": {
+                        "pos": 1
+                    }
+                },
+                "find_keys": {
+                    "range": {
+                        "lastkey": 0,
+                        "step": 1,
+                        "limit": 0
+                    }
+                }
+            }
+        ],
+        "reply_schema": {
+            "description": "Array of results. Returns empty array if the key does not exist.",
+            "type": "array",
+            "minItems": 0,
+            "maxItems": 4294967295,
+            "items": {
+                "oneOf": [
+                    {
+                        "description": "The field does not exist.",
+                        "const": -2
+                    },
+                    {
+                        "description": "Specified NX | XX | GT | LT condition not met",
+                        "const": 0
+                    },
+                    {
+                        "description": "Expiration time was set or updated.",
+                        "const": 1
+                    },
+                    {
+                        "description": "Field deleted because the specified expiration time is in the past.",
+                        "const": 2
+                    }
+                ]
+            }
+        },
+        "arguments": [
+            {
+                "name": "key",
+                "type": "key",
+                "key_spec_index": 0
+            },
+            {
+                "name": "seconds",
+                "type": "integer"
+            },
+            {
+                "name": "condition",
+                "type": "oneof",
+                "optional": true,
+                "arguments": [
+                    {
+                        "name": "nx",
+                        "type": "pure-token",
+                        "token": "NX"
+                    },
+                    {
+                        "name": "xx",
+                        "type": "pure-token",
+                        "token": "XX"
+                    },
+                    {
+                        "name": "gt",
+                        "type": "pure-token",
+                        "token": "GT"
+                    },
+                    {
+                        "name": "lt",
+                        "type": "pure-token",
+                        "token": "LT"
+                    }
+                ]
+            },
+            {
+                "name": "fields",
+                "token": "FIELDS",
+                "type": "block",
+                "arguments": [
+                    {
+                        "name": "numfields",
+                        "type": "integer"
+                    },
+                    {
+                        "name": "field",
+                        "type": "string",
+                        "multiple": true
+                    }
+                ]
+            }
+        ]
+    }
+}
diff --git a/src/commands/hexpireat.json b/src/commands/hexpireat.json
new file mode 100644
index 00000000000..4a7c0c71886
--- /dev/null
+++ b/src/commands/hexpireat.json
@@ -0,0 +1,119 @@
+{
+    "HEXPIREAT": {
+        "summary": "Set expiry for hash field using an absolute Unix timestamp (seconds)",
+        "complexity": "O(N) where N is the number of specified fields",
+        "group": "hash",
+        "since": "7.4.0",
+        "arity": -6,
+        "function": "hexpireatCommand",
+        "history": [],
+        "command_flags": [
+            "WRITE",
+            "DENYOOM",
+            "FAST"
+        ],
+        "acl_categories": [
+            "HASH"
+        ],
+        "key_specs": [
+            {
+                "flags": [
+                    "RW",
+                    "UPDATE"
+                ],
+                "begin_search": {
+                    "index": {
+                        "pos": 1
+                    }
+                },
+                "find_keys": {
+                    "range": {
+                        "lastkey": 0,
+                        "step": 1,
+                        "limit": 0
+                    }
+                }
+            }
+        ],
+        "reply_schema": {
+            "description": "Array of results. Returns empty array if the key does not exist.",
+            "type": "array",
+            "minItems": 0,
+            "maxItems": 4294967295,
+            "items": {
+                "oneOf": [
+                    {
+                        "description": "The field does not exist.",
+                        "const": -2
+                    },
+                    {
+                        "description": "Specified NX | XX | GT | LT condition not met",
+                        "const": 0
+                    },
+                    {
+                        "description": "Expiration time was set or updated.",
+                        "const": 1
+                    },
+                    {
+                        "description": "Field deleted because the specified expiration time is in the past.",
+                        "const": 2
+                    }
+                ]
+            }
+        },
+        "arguments": [
+            {
+                "name": "key",
+                "type": "key",
+                "key_spec_index": 0
+            },
+            {
+                "name": "unix-time-seconds",
+                "type": "unix-time"
+            },
+            {
+                "name": "condition",
+                "type": "oneof",
+                "optional": true,
+                "arguments": [
+                    {
+                        "name": "nx",
+                        "type": "pure-token",
+                        "token": "NX"
+                    },
+                    {
+                        "name": "xx",
+                        "type": "pure-token",
+                        "token": "XX"
+                    },
+                    {
+                        "name": "gt",
+                        "type": "pure-token",
+                        "token": "GT"
+                    },
+                    {
+                        "name": "lt",
+                        "type": "pure-token",
+                        "token": "LT"
+                    }
+                ]
+            },
+            {
+                "name": "fields",
+                "token": "FIELDS",
+                "type": "block",
+                "arguments": [
+                    {
+                        "name": "numfields",
+                        "type": "integer"
+                    },
+                    {
+                        "name": "field",
+                        "type": "string",
+                        "multiple": true
+                    }
+                ]
+            }
+        ]
+    }
+}
\ No newline at end of file
diff --git a/src/commands/hexpiretime.json b/src/commands/hexpiretime.json
new file mode 100644
index 00000000000..28c1e5f4baa
--- /dev/null
+++ b/src/commands/hexpiretime.json
@@ -0,0 +1,84 @@
+{
+    "HEXPIRETIME": {
+        "summary": "Returns the expiration time of a hash field as a Unix timestamp, in seconds.",
+        "complexity": "O(N) where N is the number of specified fields",
+        "group": "hash",
+        "since": "7.4.0",
+        "arity": -5,
+        "function": "hexpiretimeCommand",
+        "history": [],
+        "command_flags": [
+            "READONLY",
+            "FAST"
+        ],
+        "acl_categories": [
+            "HASH"
+        ],
+        "key_specs": [
+            {
+                "flags": [
+                    "RO",
+                    "ACCESS"
+                ],
+                "begin_search": {
+                    "index": {
+                        "pos": 1
+                    }
+                },
+                "find_keys": {
+                    "range": {
+                        "lastkey": 0,
+                        "step": 1,
+                        "limit": 0
+                    }
+                }
+            }
+        ],
+        "reply_schema": {
+            "description": "Array of results. Returns empty array if the key does not exist.",
+            "type": "array",
+            "minItems": 0,
+            "maxItems": 4294967295,
+            "items": {
+                "oneOf": [
+                    {
+                        "description": "The field does not exist.",
+                        "const": -2
+                    },
+                    {
+                        "description": "The field exists but has no associated expire.",
+                        "const": -1
+                    },
+                    {
+                        "description": "Expiration Unix timestamp in seconds.",
+                        "type": "integer",
+                        "minimum": 1
+                    }
+                ]
+            }
+        },
+        "arguments": [
+            {
+                "name": "key",
+                "type": "key",
+                "key_spec_index": 0
+            },
+            {
+                "name": "fields",
+                "token": "FIELDS",
+                "type": "block",
+                "arguments": [
+                    {
+                        "name": "numfields",
+                        "type": "integer"
+                    },
+                    {
+                        "name": "field",
+                        "type": "string",
+                        "multiple": true
+                    }
+                ]
+            }
+        ]
+    }
+}
diff --git a/src/commands/hpersist.json b/src/commands/hpersist.json
new file mode 100644
index 00000000000..e7c1cb11bc9
--- /dev/null
+++ b/src/commands/hpersist.json
@@ -0,0 +1,83 @@
+{
+    "HPERSIST": {
+        "summary": "Removes the expiration time for each specified field",
+        "complexity": "O(N) where N is the number of specified fields",
+        "group": "hash",
+        "since": "7.4.0",
+        "arity": -5,
+        "function": "hpersistCommand",
+        "history": [],
+        "command_flags": [
+            "WRITE",
+            "FAST"
+        ],
+        "acl_categories": [
+            "HASH"
+        ],
+        "key_specs": [
+            {
+                "flags": [
+                    "RW",
+                    "UPDATE"
+                ],
+                "begin_search": {
+                    "index": {
+                        "pos": 1
+                    }
+                },
+                "find_keys": {
+                    "range": {
+                        "lastkey": 0,
+                        "step": 1,
+                        "limit": 0
+                    }
+                }
+            }
+        ],
+        "reply_schema": {
+            "description": "Array of results. Returns empty array if the key does not exist.",
+            "type": "array",
+            "minItems": 0,
+            "maxItems": 4294967295,
+            "items": {
+                "oneOf": [
+                    {
+                        "description": "The field does not exist.",
+                        "const": -2
+                    },
+                    {
+                        "description": "The field exists but has no associated expire.",
+                        "const": -1
+                    },
+                    {
+                        "description": "Expiration time was removed",
+                        "const": 1
+                    }
+                ]
+            }
+        },
+        "arguments": [
+            {
+                "name": "key",
+                "type": "key",
+                "key_spec_index": 0
+            },
+            {
+                "name": "fields",
+                "token": "FIELDS",
+                "type": "block",
+                "arguments": [
+                    {
+                        "name": "numfields",
+                        "type": "integer"
+                    },
+                    {
+                        "name": "field",
+                        "type": "string",
+                        "multiple": true
+                    }
+                ]
+            }
+        ]
+    }
+}
diff --git a/src/commands/hpexpire.json b/src/commands/hpexpire.json
new file mode 100644
index 00000000000..02c68e61634
--- /dev/null
+++ b/src/commands/hpexpire.json
@@ -0,0 +1,119 @@
+{
+    "HPEXPIRE": {
+        "summary": "Set expiry for hash field using relative time to expire (milliseconds)",
+        "complexity": "O(N) where N is the number of specified fields",
+        "group": "hash",
+        "since": "7.4.0",
+        "arity": -6,
+        "function": "hpexpireCommand",
+        "history": [],
+        "command_flags": [
+            "WRITE",
+            "DENYOOM",
+            "FAST"
+        ],
+        "acl_categories": [
+            "HASH"
+        ],
+        "key_specs": [
+            {
+                "flags": [
+                    "RW",
+                    "UPDATE"
+                ],
+                "begin_search": {
+                    "index": {
+                        "pos": 1
+                    }
+                },
+                "find_keys": {
+                    "range": {
+                        "lastkey": 0,
+                        "step": 1,
+                        "limit": 0
+                    }
+                }
+            }
+        ],
+        "reply_schema": {
+            "description": "Array of results. Returns empty array if the key does not exist.",
+            "type": "array",
+            "minItems": 0,
+            "maxItems": 4294967295,
+            "items": {
+                "oneOf": [
+                    {
+                        "description": "The field does not exist.",
+                        "const": -2
+                    },
+                    {
+                        "description": "Specified NX | XX | GT | LT condition not met",
+                        "const": 0
+                    },
+                    {
+                        "description": "Expiration time was set or updated.",
+                        "const": 1
+                    },
+                    {
+                        "description": "Field deleted because the specified expiration time is in the past.",
+                        "const": 2
+                    }
+                ]
+            }
+        },
+        "arguments": [
+            {
+                "name": "key",
+                "type": "key",
+                "key_spec_index": 0
+            },
+            {
+                "name": "milliseconds",
+                "type": "integer"
+            },
+            {
+                "name": "condition",
+                "type": "oneof",
+                "optional": true,
+                "arguments": [
+                    {
+                        "name": "nx",
+                        "type": "pure-token",
+                        "token": "NX"
+                    },
+                    {
+                        "name": "xx",
+                        "type": "pure-token",
+                        "token": "XX"
+                    },
+                    {
+                        "name": "gt",
+                        "type": "pure-token",
+                        "token": "GT"
+                    },
+                    {
+                        "name": "lt",
+                        "type": "pure-token",
+                        "token": "LT"
+                    }
+                ]
+            },
+            {
+                "name": "fields",
+                "token": "FIELDS",
+                "type": "block",
+                "arguments": [
+                    {
+                        "name": "numfields",
+                        "type": "integer"
+                    },
+                    {
+                        "name": "field",
+                        "type": "string",
+                        "multiple": true
+                    }
+                ]
+            }
+        ]
+    }
+}
\ No newline at end of file
diff --git a/src/commands/hpexpireat.json b/src/commands/hpexpireat.json
new file mode 100644
index 00000000000..58e5555fb5f
--- /dev/null
+++ b/src/commands/hpexpireat.json
@@ -0,0 +1,119 @@
+{
+    "HPEXPIREAT": {
+        "summary": "Set expiry for hash field using an absolute Unix timestamp (milliseconds)",
+        "complexity": "O(N) where N is the number of specified fields",
+        "group": "hash",
+        "since": "7.4.0",
+        "arity": -6,
+        "function": "hpexpireatCommand",
+        "history": [],
+        "command_flags": [
+            "WRITE",
+            "DENYOOM",
+            "FAST"
+        ],
+        "acl_categories": [
+            "HASH"
+        ],
+        "key_specs": [
+            {
+                "flags": [
+                    "RW",
+                    "UPDATE"
+                ],
+                "begin_search": {
+                    "index": {
+                        "pos": 1
+                    }
+                },
+                "find_keys": {
+                    "range": {
+                        "lastkey": 0,
+                        "step": 1,
+                        "limit": 0
+                    }
+                }
+            }
+        ],
+        "reply_schema": {
+            "description": "Array of results. Returns empty array if the key does not exist.",
+            "type": "array",
+            "minItems": 0,
+            "maxItems": 4294967295,
+            "items": {
+                "oneOf": [
+                    {
+                        "description": "The field does not exist.",
+                        "const": -2
+                    },
+                    {
+                        "description": "Specified NX | XX | GT | LT condition not met",
+                        "const": 0
+                    },
+                    {
+                        "description": "Expiration time was set or updated.",
+                        "const": 1
+                    },
+                    {
+                        "description": "Field deleted because the specified expiration time is in the past.",
+                        "const": 2
+                    }
+                ]
+            }
+        },
+        "arguments": [
+            {
+                "name": "key",
+                "type": "key",
+                "key_spec_index": 0
+            },
+            {
+                "name": "unix-time-milliseconds",
+                "type": "unix-time"
+            },
+            {
+                "name": "condition",
+                "type": "oneof",
+                "optional": true,
+                "arguments": [
+                    {
+                        "name": "nx",
+                        "type": "pure-token",
+                        "token": "NX"
+                    },
+                    {
+                        "name": "xx",
+                        "type": "pure-token",
+                        "token": "XX"
+                    },
+                    {
+                        "name": "gt",
+                        "type": "pure-token",
+                        "token": "GT"
+                    },
+                    {
+                        "name": "lt",
+                        "type": "pure-token",
+                        "token": "LT"
+                    }
+                ]
+            },
+            {
+                "name": "fields",
+                "token": "FIELDS",
+                "type": "block",
+                "arguments": [
+                    {
+                        "name": "numfields",
+                        "type": "integer"
+                    },
+                    {
+                        "name": "field",
+                        "type": "string",
+                        "multiple": true
+                    }
+                ]
+            }
+        ]
+    }
+}
\ No newline at end of file
diff --git a/src/commands/hpexpiretime.json b/src/commands/hpexpiretime.json
new file mode 100644
index 00000000000..67406cb7dad
--- /dev/null
+++ b/src/commands/hpexpiretime.json
@@ -0,0 +1,84 @@
+{
+    "HPEXPIRETIME": {
+        "summary": "Returns the expiration time of a hash field as a Unix timestamp, in msec.",
+        "complexity": "O(N) where N is the number of specified fields",
+        "group": "hash",
+        "since": "7.4.0",
+        "arity": -5,
+        "function": "hpexpiretimeCommand",
+        "history": [],
+        "command_flags": [
+            "READONLY",
+            "FAST"
+        ],
+        "acl_categories": [
+            "HASH"
+        ],
+        "key_specs": [
+            {
+                "flags": [
+                    "RO",
+                    "ACCESS"
+                ],
+                "begin_search": {
+                    "index": {
+                        "pos": 1
+                    }
+                },
+                "find_keys": {
+                    "range": {
+                        "lastkey": 0,
+                        "step": 1,
+                        "limit": 0
+                    }
+                }
+            }
+        ],
+        "reply_schema": {
+            "description": "Array of results. Returns empty array if the key does not exist.",
+            "type": "array",
+            "minItems": 0,
+            "maxItems": 4294967295,
+            "items": {
+                "oneOf": [
+                    {
+                        "description": "The field does not exist.",
+                        "const": -2
+                    },
+                    {
+                        "description": "The field exists but has no associated expire.",
+                        "const": -1
+                    },
+                    {
+                        "description": "Expiration Unix timestamp in milliseconds.",
+                        "type": "integer",
+                        "minimum": 1
+                    }
+                ]
+            }
+        },
+        "arguments": [
+            {
+                "name": "key",
+                "type": "key",
+                "key_spec_index": 0
+            },
+            {
+                "name": "fields",
+                "token": "FIELDS",
+                "type": "block",
+                "arguments": [
+                    {
+                        "name": "numfields",
+                        "type": "integer"
+                    },
+                    {
+                        "name": "field",
+                        "type": "string",
+                        "multiple": true
+                    }
+                ]
+            }
+        ]
+    }
+}
diff --git a/src/commands/hpttl.json b/src/commands/hpttl.json
new file mode 100644
index 00000000000..9f24bec8f3e
--- /dev/null
+++ b/src/commands/hpttl.json
@@ -0,0 +1,84 @@
+{
+    "HPTTL": {
+        "summary": "Returns the TTL in milliseconds of a hash field.",
+        "complexity": "O(N) where N is the number of specified fields",
+        "group": "hash",
+        "since": "7.4.0",
+        "arity": -5,
+        "function": "hpttlCommand",
+        "history": [],
+        "command_flags": [
+            "READONLY",
+            "FAST"
+        ],
+        "acl_categories": [
+            "HASH"
+        ],
+        "key_specs": [
+            {
+                "flags": [
+                    "RO",
+                    "ACCESS"
+                ],
+                "begin_search": {
+                    "index": {
+                        "pos": 1
+                    }
+                },
+                "find_keys": {
+                    "range": {
+                        "lastkey": 0,
+                        "step": 1,
+                        "limit": 0
+                    }
+                }
+            }
+        ],
+        "reply_schema": {
+            "description": "Array of results. Returns empty array if the key does not exist.",
+            "type": "array",
+            "minItems": 0,
+            "maxItems": 4294967295,
+            "items": {
+                "oneOf": [
+                    {
+                        "description": "The field does not exist.",
+                        "const": -2
+                    },
+                    {
+                        "description": "The field exists but has no associated expire.",
+                        "const": -1
+                    },
+                    {
+                        "description": "TTL in milliseconds.",
+                        "type": "integer",
+                        "minimum": 1
+                    }
+                ]
+            }
+        },
+        "arguments": [
+            {
+                "name": "key",
+                "type": "key",
+                "key_spec_index": 0
+            },
+            {
+                "name": "fields",
+                "token": "FIELDS",
+                "type": "block",
+                "arguments": [
+                    {
+                        "name": "numfields",
+                        "type": "integer"
+                    },
+                    {
+                        "name": "field",
+                        "type": "string",
+                        "multiple": true
+                    }
+                ]
+            }
+        ]
+    }
+}
diff --git a/src/commands/hscan.json b/src/commands/hscan.json
index 0888eec9fce..99e916574f6 100644
--- a/src/commands/hscan.json
+++ b/src/commands/hscan.json
@@ -56,6 +56,12 @@
                 "name": "count",
                 "type": "integer",
                 "optional": true
+            },
+            {
+                "token": "NOVALUES",
+                "name": "novalues",
+                "type": "pure-token",
+                "optional": true
             }
         ],
         "reply_schema": {
@@ -69,7 +75,7 @@
                     "type": "string"
                 },
                 {
-                    "description": "list of key/value pairs from the hash where each even element is the key, and each odd element is the value",
+                    "description": "list of key/value pairs from the hash where each even element is the key, and each odd element is the value, or when novalues option is on, a list of keys from the hash",
                     "type": "array",
                     "items": {
                         "type": "string"
diff --git a/src/commands/httl.json b/src/commands/httl.json
new file mode 100644
index 00000000000..e0e865056af
--- /dev/null
+++ b/src/commands/httl.json
@@ -0,0 +1,84 @@
+{
+    "HTTL": {
+        "summary": "Returns the TTL in seconds of a hash field.",
+        "complexity": "O(N) where N is the number of specified fields",
+        "group": "hash",
+        "since": "7.4.0",
+        "arity": -5,
+        "function": "httlCommand",
+        "history": [],
+        "command_flags": [
+            "READONLY",
+            "FAST"
+        ],
+        "acl_categories": [
+            "HASH"
+        ],
+        "key_specs": [
+            {
+                "flags": [
+                    "RO",
+                    "ACCESS"
+                ],
+                "begin_search": {
+                    "index": {
+                        "pos": 1
+                    }
+                },
+                "find_keys": {
+                    "range": {
+                        "lastkey": 0,
+                        "step": 1,
+                        "limit": 0
+                    }
+                }
+            }
+        ],
+        "reply_schema": {
+            "description": "Array of results. Returns empty array if the key does not exist.",
+            "type": "array",
+            "minItems": 0,
+            "maxItems": 4294967295,
+            "items": {
+                "oneOf": [
+                    {
+                        "description": "The field does not exist.",
+                        "const": -2
+                    },
+                    {
+                        "description": "The field exists but has no associated expire.",
+                        "const": -1
+                    },
+                    {
+                        "description": "TTL in seconds.",
+                        "type": "integer",
+                        "minimum": 1
+                    }
+                ]
+            }
+        },
+        "arguments": [
+            {
+                "name": "key",
+                "type": "key",
+                "key_spec_index": 0
+            },
+            {
+                "name": "fields",
+                "token": "FIELDS",
+                "type": "block",
+                "arguments": [
+                    {
+                        "name": "numfields",
+                        "type": "integer"
+                    },
+                    {
+                        "name": "field",
+                        "type": "string",
+                        "multiple": true
+                    }
+                ]
+            }
+        ]
+    }
+}
diff --git a/src/commands/latency-reset.json b/src/commands/latency-reset.json
index d4891da5edc..322328277c7 100644
--- a/src/commands/latency-reset.json
+++ b/src/commands/latency-reset.json
@@ -15,7 +15,7 @@
         ],
         "command_tips": [
             "REQUEST_POLICY:ALL_NODES",
-            "RESPONSE_POLICY:ALL_SUCCEEDED"
+            "RESPONSE_POLICY:AGG_SUM"
         ],
         "reply_schema": {
             "type": "integer",
diff --git a/src/commands/memory-stats.json b/src/commands/memory-stats.json
index de82dc8cc59..98e49b7d271 100644
--- a/src/commands/memory-stats.json
+++ b/src/commands/memory-stats.json
@@ -47,9 +47,18 @@
                 "functions.caches": {
                     "type": "integer"
                 },
+                "overhead.db.hashtable.lut": {
+                    "type": "integer"
+                },
+                "overhead.db.hashtable.rehashing": {
+                    "type": "integer"
+                },
                 "overhead.total": {
                     "type": "integer"
                 },
+                "db.dict.rehashing.count": {
+                    "type": "integer"
+                },
                 "keys.count": {
                     "type": "integer"
                 },
@@ -74,6 +83,9 @@
                 "allocator.resident": {
                     "type": "integer"
                 },
+                "allocator.muzzy": {
+                    "type": "integer"
+                },
                 "allocator-fragmentation.ratio": {
                     "type": "number"
                 },
@@ -100,7 +112,7 @@
                 }
             },
             "patternProperties": {
-                "^db.": {
+                "^db\\.\\d+$": {
                     "type": "object",
                     "properties": {
                         "overhead.hashtable.main": {
@@ -108,9 +120,6 @@
                         },
                         "overhead.hashtable.expires": {
                             "type": "integer"
-                        },
-                        "overhead.hashtable.slot-to-keys": {
-                            "type": "integer"
                         }
                     },
                     "additionalProperties": false
diff --git a/src/commands/msetnx.json b/src/commands/msetnx.json
index fa71d2b45bc..27592d3044a 100644
--- a/src/commands/msetnx.json
+++ b/src/commands/msetnx.json
@@ -13,10 +13,6 @@
         "acl_categories": [
             "STRING"
         ],
-        "command_tips": [
-            "REQUEST_POLICY:MULTI_SHARD",
-            "RESPONSE_POLICY:AGG_MIN"
-        ],
         "key_specs": [
             {
                 "flags": [
diff --git a/src/commands/randomkey.json b/src/commands/randomkey.json
index e8773ee6b01..eeef61aef17 100644
--- a/src/commands/randomkey.json
+++ b/src/commands/randomkey.json
@@ -15,6 +15,7 @@
         ],
         "command_tips": [
             "REQUEST_POLICY:ALL_SHARDS",
+            "RESPONSE_POLICY:SPECIAL",
             "NONDETERMINISTIC_OUTPUT"
         ],
         "reply_schema": {
diff --git a/src/commands/replicaof.json b/src/commands/replicaof.json
index aa49390197c..95e5cb400b8 100644
--- a/src/commands/replicaof.json
+++ b/src/commands/replicaof.json
@@ -14,12 +14,40 @@
         ],
         "arguments": [
             {
-                "name": "host",
-                "type": "string"
-            },
-            {
-                "name": "port",
-                "type": "integer"
+                "name": "args",
+                "type": "oneof",
+                "arguments": [
+                    {
+                        "name": "host-port",
+                        "type": "block",
+                        "arguments": [
+                            {
+                                "name": "host",
+                                "type": "string"
+                            },
+                            {
+                                "name": "port",
+                                "type": "integer"
+                            }
+                        ]
+                    },
+                    {
+                        "name": "no-one",
+                        "type": "block",
+                        "arguments": [
+                            {
+                                "name": "no",
+                                "type": "pure-token",
+                                "token": "NO"
+                            },
+                            {
+                                "name": "one",
+                                "type": "pure-token",
+                                "token": "ONE"
+                            }
+                        ]
+                    }
+                ]
             }
         ],
         "reply_schema": {
diff --git a/src/commands/scan.json b/src/commands/scan.json
index ca9adf5b44b..a7df78a218b 100644
--- a/src/commands/scan.json
+++ b/src/commands/scan.json
@@ -21,7 +21,8 @@
         ],
         "command_tips": [
             "NONDETERMINISTIC_OUTPUT",
-            "REQUEST_POLICY:SPECIAL"
+            "REQUEST_POLICY:SPECIAL",
+            "RESPONSE_POLICY:SPECIAL"
         ],
         "arguments": [
             {
diff --git a/src/commands/sinterstore.json b/src/commands/sinterstore.json
index 28ccfff691e..e8e4bb44746 100644
--- a/src/commands/sinterstore.json
+++ b/src/commands/sinterstore.json
@@ -16,7 +16,7 @@
         "key_specs": [
             {
                 "flags": [
-                    "RW",
+                    "OW",
                     "UPDATE"
                 ],
                 "begin_search": {
diff --git a/src/commands/slaveof.json b/src/commands/slaveof.json
index 9790730b25e..6595960f949 100644
--- a/src/commands/slaveof.json
+++ b/src/commands/slaveof.json
@@ -19,12 +19,40 @@
         ],
         "arguments": [
             {
-                "name": "host",
-                "type": "string"
-            },
-            {
-                "name": "port",
-                "type": "integer"
+                "name": "args",
+                "type": "oneof",
+                "arguments": [
+                    {
+                        "name": "host-port",
+                        "type": "block",
+                        "arguments": [
+                            {
+                                "name": "host",
+                                "type": "string"
+                            },
+                            {
+                                "name": "port",
+                                "type": "integer"
+                            }
+                        ]
+                    },
+                    {
+                        "name": "no-one",
+                        "type": "block",
+                        "arguments": [
+                            {
+                                "name": "no",
+                                "type": "pure-token",
+                                "token": "NO"
+                            },
+                            {
+                                "name": "one",
+                                "type": "pure-token",
+                                "token": "ONE"
+                            }
+                        ]
+                    }
+                ]
             }
         ],
         "reply_schema": {
diff --git a/src/commands/sort.json b/src/commands/sort.json
index 5e117c9501a..d5f6511142d 100644
--- a/src/commands/sort.json
+++ b/src/commands/sort.json
@@ -150,7 +150,7 @@
                                 "type": "string"
                             },
                             {
-                                "description": "GET option is specified, but no object was found ",
+                                "description": "GET option is specified, but no object was found",
                                 "type": "null"
                             }
                         ]
diff --git a/src/commands/sort_ro.json b/src/commands/sort_ro.json
index 8b32b17fa1f..04cc3c8417a 100644
--- a/src/commands/sort_ro.json
+++ b/src/commands/sort_ro.json
@@ -117,7 +117,15 @@
             "description": "a list of sorted elements",
             "type": "array",
             "items": {
-                "type": "string"
+                "oneOf": [
+                    {
+                        "type": "string"
+                    },
+                    {
+                        "description": "GET option is specified, but no object was found",
+                        "type": "null"
+                    }
+                ]
             }
         }
     }
diff --git a/src/commands/wait.json b/src/commands/wait.json
index f936b924218..cb82f64956c 100644
--- a/src/commands/wait.json
+++ b/src/commands/wait.json
@@ -7,6 +7,7 @@
         "arity": 3,
         "function": "waitCommand",
         "command_flags": [
+            "BLOCKING"
         ],
         "acl_categories": [
             "CONNECTION"
diff --git a/src/commands/waitaof.json b/src/commands/waitaof.json
index 735a8f261f0..19b514c2741 100644
--- a/src/commands/waitaof.json
+++ b/src/commands/waitaof.json
@@ -7,7 +7,7 @@
         "arity": 4,
         "function": "waitaofCommand",
         "command_flags": [
-            "NOSCRIPT"
+            "BLOCKING"
         ],
         "acl_categories": [
             "CONNECTION"
diff --git a/src/commands/xgroup-create.json b/src/commands/xgroup-create.json
index 6b11a1f002c..119d7f300bd 100644
--- a/src/commands/xgroup-create.json
+++ b/src/commands/xgroup-create.json
@@ -72,8 +72,9 @@
                 "optional": true
             },
             {
+                "name": "entriesread",
+                "display": "entries-read",
                 "token": "ENTRIESREAD",
-                "name": "entries-read",
                 "type": "integer",
                 "optional": true
             }
diff --git a/src/commands/xinfo-consumers.json b/src/commands/xinfo-consumers.json
index b507e8e5926..8713a60b0f1 100644
--- a/src/commands/xinfo-consumers.json
+++ b/src/commands/xinfo-consumers.json
@@ -10,7 +10,7 @@
         "history": [
             [
                 "7.2.0",
-                "Added the `inactive` field."
+                "Added the `inactive` field, and changed the meaning of `idle`."
             ]
         ],
         "command_flags": [
diff --git a/src/commands/xinfo-stream.json b/src/commands/xinfo-stream.json
index 018826f91a6..609dc336d56 100644
--- a/src/commands/xinfo-stream.json
+++ b/src/commands/xinfo-stream.json
@@ -292,7 +292,8 @@
                                                 },
                                                 "seen-time": {
                                                     "description": "timestamp of the last interaction attempt of the consumer",
-                                                    "type": "integer"
+                                                    "type": "integer",
+                                                    "minimum": 0
                                                 },
                                                 "pel-count": {
                                                     "description": "number of unacknowledged entries that belong to the consumer",
diff --git a/src/commands/xread.json b/src/commands/xread.json
index 3a78ffb224e..95e22c494e1 100644
--- a/src/commands/xread.json
+++ b/src/commands/xread.json
@@ -8,8 +8,7 @@
         "get_keys_function": "xreadGetKeys",
         "command_flags": [
             "BLOCKING",
-            "READONLY",
-            "BLOCKING"
+            "READONLY"
         ],
         "acl_categories": [
             "STREAM"
diff --git a/src/config.c b/src/config.c
index b26704283f0..e1868e0b2ea 100644
--- a/src/config.c
+++ b/src/config.c
@@ -1,31 +1,10 @@
 /* Configuration file parsing and CONFIG GET/SET commands implementation.
  *
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -1120,12 +1099,22 @@ struct rewriteConfigState *rewriteConfigReadOldFile(char *path) {
     if (fp == NULL && errno != ENOENT) return NULL;
 
     struct redis_stat sb;
-    if (fp && redis_fstat(fileno(fp),&sb) == -1) return NULL;
+    if (fp && redis_fstat(fileno(fp),&sb) == -1) {
+        fclose(fp);
+        return NULL;
+    }
 
     int linenum = -1;
     struct rewriteConfigState *state = rewriteConfigCreateState();
 
-    if (fp == NULL || sb.st_size == 0) return state;
+    if (fp == NULL) {
+        return state;
+    }
+
+    if (sb.st_size == 0) {
+        fclose(fp);
+        return state;
+    } 
 
     /* Load the file content */
     sds config = sdsnewlen(SDS_NOINIT,sb.st_size);
@@ -2387,7 +2376,7 @@ static int isValidShutdownOnSigFlags(int val, const char **err) {
 static int isValidAnnouncedNodename(char *val,const char **err) {
     if (!(isValidAuxString(val,sdslen(val)))) {
         *err = "Announced human node name contained invalid character";
-	return 0;
+        return 0;
     }
     return 1;
 }
@@ -2468,6 +2457,12 @@ static int updatePort(const char **err) {
     return 1;
 }
 
+static int updateDefragConfiguration(const char **err) {
+    UNUSED(err);
+    server.active_defrag_configuration_changed = 1;
+    return 1;
+}
+
 static int updateJemallocBgThread(const char **err) {
     UNUSED(err);
     set_jemalloc_bg_thread(server.jemalloc_bg_thread);
@@ -2528,9 +2523,9 @@ static int updateAofAutoGCEnabled(const char **err) {
 static int updateSighandlerEnabled(const char **err) {
     UNUSED(err);
     if (server.crashlog_enabled)
-        setupSignalHandlers();
+        setupSigSegvHandler();
     else
-        removeSignalHandlers();
+        removeSigSegvHandlers();
     return 1;
 }
 
@@ -3110,10 +3105,10 @@ standardConfig static_configs[] = {
     createStringConfig("dbfilename", NULL, MODIFIABLE_CONFIG | PROTECTED_CONFIG, ALLOW_EMPTY_STRING, server.rdb_filename, "dump.rdb", isValidDBfilename, NULL),
     createStringConfig("appendfilename", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.aof_filename, "appendonly.aof", isValidAOFfilename, NULL),
     createStringConfig("appenddirname", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.aof_dirname, "appendonlydir", isValidAOFdirname, NULL),
-    createStringConfig("server_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.server_cpulist, NULL, NULL, NULL),
-    createStringConfig("bio_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.bio_cpulist, NULL, NULL, NULL),
-    createStringConfig("aof_rewrite_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.aof_rewrite_cpulist, NULL, NULL, NULL),
-    createStringConfig("bgsave_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.bgsave_cpulist, NULL, NULL, NULL),
+    createStringConfig("server-cpulist", "server_cpulist", IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.server_cpulist, NULL, NULL, NULL),
+    createStringConfig("bio-cpulist", "bio_cpulist", IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.bio_cpulist, NULL, NULL, NULL),
+    createStringConfig("aof-rewrite-cpulist", "aof_rewrite_cpulist", IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.aof_rewrite_cpulist, NULL, NULL, NULL),
+    createStringConfig("bgsave-cpulist", "bgsave_cpulist", IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.bgsave_cpulist, NULL, NULL, NULL),
     createStringConfig("ignore-warnings", NULL, MODIFIABLE_CONFIG, ALLOW_EMPTY_STRING, server.ignore_warnings, "", NULL, NULL),
     createStringConfig("proc-title-template", NULL, MODIFIABLE_CONFIG, ALLOW_EMPTY_STRING, server.proc_title_template, CONFIG_DEFAULT_PROC_TITLE_TEMPLATE, isValidProcTitleTemplate, updateProcTitleTemplate),
     createStringConfig("bind-source-addr", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.bind_source_addr, NULL, NULL, NULL),
@@ -3154,15 +3149,15 @@ standardConfig static_configs[] = {
     createIntConfig("list-max-listpack-size", "list-max-ziplist-size", MODIFIABLE_CONFIG, INT_MIN, INT_MAX, server.list_max_listpack_size, -2, INTEGER_CONFIG, NULL, NULL),
     createIntConfig("tcp-keepalive", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.tcpkeepalive, 300, INTEGER_CONFIG, NULL, NULL),
     createIntConfig("cluster-migration-barrier", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.cluster_migration_barrier, 1, INTEGER_CONFIG, NULL, NULL),
-    createIntConfig("active-defrag-cycle-min", NULL, MODIFIABLE_CONFIG, 1, 99, server.active_defrag_cycle_min, 1, INTEGER_CONFIG, NULL, NULL), /* Default: 1% CPU min (at lower threshold) */
-    createIntConfig("active-defrag-cycle-max", NULL, MODIFIABLE_CONFIG, 1, 99, server.active_defrag_cycle_max, 25, INTEGER_CONFIG, NULL, NULL), /* Default: 25% CPU max (at upper threshold) */
+    createIntConfig("active-defrag-cycle-min", NULL, MODIFIABLE_CONFIG, 1, 99, server.active_defrag_cycle_min, 1, INTEGER_CONFIG, NULL, updateDefragConfiguration), /* Default: 1% CPU min (at lower threshold) */
+    createIntConfig("active-defrag-cycle-max", NULL, MODIFIABLE_CONFIG, 1, 99, server.active_defrag_cycle_max, 25, INTEGER_CONFIG, NULL, updateDefragConfiguration), /* Default: 25% CPU max (at upper threshold) */
     createIntConfig("active-defrag-threshold-lower", NULL, MODIFIABLE_CONFIG, 0, 1000, server.active_defrag_threshold_lower, 10, INTEGER_CONFIG, NULL, NULL), /* Default: don't defrag when fragmentation is below 10% */
-    createIntConfig("active-defrag-threshold-upper", NULL, MODIFIABLE_CONFIG, 0, 1000, server.active_defrag_threshold_upper, 100, INTEGER_CONFIG, NULL, NULL), /* Default: maximum defrag force at 100% fragmentation */
+    createIntConfig("active-defrag-threshold-upper", NULL, MODIFIABLE_CONFIG, 0, 1000, server.active_defrag_threshold_upper, 100, INTEGER_CONFIG, NULL, updateDefragConfiguration), /* Default: maximum defrag force at 100% fragmentation */
     createIntConfig("lfu-log-factor", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.lfu_log_factor, 10, INTEGER_CONFIG, NULL, NULL),
     createIntConfig("lfu-decay-time", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.lfu_decay_time, 1, INTEGER_CONFIG, NULL, NULL),
     createIntConfig("replica-priority", "slave-priority", MODIFIABLE_CONFIG, 0, INT_MAX, server.slave_priority, 100, INTEGER_CONFIG, NULL, NULL),
     createIntConfig("repl-diskless-sync-delay", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.repl_diskless_sync_delay, 5, INTEGER_CONFIG, NULL, NULL),
-    createIntConfig("maxmemory-samples", NULL, MODIFIABLE_CONFIG, 1, INT_MAX, server.maxmemory_samples, 5, INTEGER_CONFIG, NULL, NULL),
+    createIntConfig("maxmemory-samples", NULL, MODIFIABLE_CONFIG, 1, 64, server.maxmemory_samples, 5, INTEGER_CONFIG, NULL, NULL),
     createIntConfig("maxmemory-eviction-tenacity", NULL, MODIFIABLE_CONFIG, 0, 100, server.maxmemory_eviction_tenacity, 10, INTEGER_CONFIG, NULL, NULL),
     createIntConfig("timeout", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.maxidletime, 0, INTEGER_CONFIG, NULL, NULL), /* Default client timeout: infinite */
     createIntConfig("replica-announce-port", "slave-announce-port", MODIFIABLE_CONFIG, 0, 65535, server.slave_announce_port, 0, INTEGER_CONFIG, NULL, NULL),
@@ -3188,6 +3183,8 @@ standardConfig static_configs[] = {
     createUIntConfig("maxclients", NULL, MODIFIABLE_CONFIG, 1, UINT_MAX, server.maxclients, 10000, INTEGER_CONFIG, NULL, updateMaxclients),
     createUIntConfig("unixsocketperm", NULL, IMMUTABLE_CONFIG, 0, 0777, server.unixsocketperm, 0, OCTAL_CONFIG, NULL, NULL),
     createUIntConfig("socket-mark-id", NULL, IMMUTABLE_CONFIG, 0, UINT_MAX, server.socket_mark_id, 0, INTEGER_CONFIG, NULL, NULL),
+    createUIntConfig("max-new-connections-per-cycle", NULL, MODIFIABLE_CONFIG, 1, 1000, server.max_new_conns_per_cycle, 10, INTEGER_CONFIG, NULL, NULL),
+    createUIntConfig("max-new-tls-connections-per-cycle", NULL, MODIFIABLE_CONFIG, 1, 1000, server.max_new_tls_conns_per_cycle, 1, INTEGER_CONFIG, NULL, NULL),
 #ifdef LOG_REQ_RES
     createUIntConfig("client-default-resp", NULL, IMMUTABLE_CONFIG | HIDDEN_CONFIG, 2, 3, server.client_default_resp, 2, INTEGER_CONFIG, NULL, NULL),
 #endif
@@ -3241,10 +3238,10 @@ standardConfig static_configs[] = {
     createBoolConfig("tls-session-caching", NULL, MODIFIABLE_CONFIG, server.tls_ctx_config.session_caching, 1, NULL, applyTlsCfg),
     createStringConfig("tls-cert-file", NULL, VOLATILE_CONFIG | MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.cert_file, NULL, NULL, applyTlsCfg),
     createStringConfig("tls-key-file", NULL, VOLATILE_CONFIG | MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.key_file, NULL, NULL, applyTlsCfg),
-    createStringConfig("tls-key-file-pass", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.key_file_pass, NULL, NULL, applyTlsCfg),
+    createStringConfig("tls-key-file-pass", NULL, MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.key_file_pass, NULL, NULL, applyTlsCfg),
     createStringConfig("tls-client-cert-file", NULL, VOLATILE_CONFIG | MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.client_cert_file, NULL, NULL, applyTlsCfg),
     createStringConfig("tls-client-key-file", NULL, VOLATILE_CONFIG | MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.client_key_file, NULL, NULL, applyTlsCfg),
-    createStringConfig("tls-client-key-file-pass", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.client_key_file_pass, NULL, NULL, applyTlsCfg),
+    createStringConfig("tls-client-key-file-pass", NULL, MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.client_key_file_pass, NULL, NULL, applyTlsCfg),
     createStringConfig("tls-dh-params-file", NULL, VOLATILE_CONFIG | MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.dh_params_file, NULL, NULL, applyTlsCfg),
     createStringConfig("tls-ca-cert-file", NULL, VOLATILE_CONFIG | MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.ca_cert_file, NULL, NULL, applyTlsCfg),
     createStringConfig("tls-ca-cert-dir", NULL, VOLATILE_CONFIG | MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.tls_ctx_config.ca_cert_dir, NULL, NULL, applyTlsCfg),
diff --git a/src/config.h b/src/config.h
index 3c9a2701388..61393bd531c 100644
--- a/src/config.h
+++ b/src/config.h
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __CONFIG_H
@@ -40,8 +19,12 @@
 #include <fcntl.h>
 #endif
 
+#if defined(__APPLE__) && defined(__MAC_OS_X_VERSION_MAX_ALLOWED) && __MAC_OS_X_VERSION_MAX_ALLOWED >= 1060
+#define MAC_OS_10_6_DETECTED
+#endif
+
 /* Define redis_fstat to fstat or fstat64() */
-#if defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
+#if defined(__APPLE__) && !defined(MAC_OS_10_6_DETECTED)
 #define redis_fstat fstat64
 #define redis_stat stat64
 #else
@@ -92,11 +75,13 @@
 #endif
 
 /* Test for accept4() */
-#ifdef __linux__
+#if defined(__linux__) || defined(OpenBSD5_7) || \
+    (__FreeBSD__ >= 10 || __FreeBSD_version >= 1000000) || \
+    (defined(NetBSD8_0) || __NetBSD_Version__ >= 800000000)
 #define HAVE_ACCEPT4 1
 #endif
 
-#if (defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined (__NetBSD__)
+#if (defined(__APPLE__) && defined(MAC_OS_10_6_DETECTED)) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined (__NetBSD__)
 #define HAVE_KQUEUE 1
 #endif
 
@@ -293,7 +278,7 @@ void setproctitle(const char *fmt, ...);
 #include <kernel/OS.h>
 #define redis_set_thread_title(name) rename_thread(find_thread(0), name)
 #else
-#if (defined __APPLE__ && defined(MAC_OS_X_VERSION_10_7))
+#if (defined __APPLE__ && defined(__MAC_OS_X_VERSION_MAX_ALLOWED) && __MAC_OS_X_VERSION_MAX_ALLOWED >= 1070)
 int pthread_setname_np(const char *name);
 #include <pthread.h>
 #define redis_set_thread_title(name) pthread_setname_np(name)
diff --git a/src/connection.h b/src/connection.h
index d0a17ab4dd6..a8c296d156a 100644
--- a/src/connection.h
+++ b/src/connection.h
@@ -1,31 +1,10 @@
 
 /*
- * Copyright (c) 2019, Redis Labs
+ * Copyright (c) 2019-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __REDIS_CONNECTION_H
@@ -40,7 +19,6 @@
 
 #define CONN_INFO_LEN   32
 #define CONN_ADDR_STR_LEN 128 /* Similar to INET6_ADDRSTRLEN, hoping to handle other protocols. */
-#define MAX_ACCEPTS_PER_CALL 1000
 
 struct aeEventLoop;
 typedef struct connection connection;
diff --git a/src/connhelpers.h b/src/connhelpers.h
index b32e44dba06..79737fa8e9a 100644
--- a/src/connhelpers.h
+++ b/src/connhelpers.h
@@ -1,31 +1,10 @@
 
 /*
- * Copyright (c) 2019, Redis Labs
+ * Copyright (c) 2019-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __REDIS_CONNHELPERS_H
diff --git a/src/crc16.c b/src/crc16.c
index 7b8c1dad0a1..d9e4f3f4997 100644
--- a/src/crc16.c
+++ b/src/crc16.c
@@ -2,7 +2,7 @@
 
 /*
  * Copyright 2001-2010 Georges Menie (www.menie.org)
- * Copyright 2010-2012 Salvatore Sanfilippo (adapted to Redis coding style)
+ * Copyright 2010-current Redis Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/src/crc16_slottable.h b/src/crc16_slottable.h
index 652aea9e1fb..f25e2412e89 100644
--- a/src/crc16_slottable.h
+++ b/src/crc16_slottable.h
@@ -7,8 +7,9 @@
  * The array indexes are slot numbers, so that given a desired slot, this string is guaranteed
  * to make redis cluster route a request to the shard holding this slot 
  */
+typedef char crc16_alphastring[4];
 
-const char *crc16_slot_table[] = {
+const crc16_alphastring crc16_slot_table[] = {
 "06S", "Qi", "5L5", "4Iu", "4gY", "460", "1Y7", "1LV", "0QG", "ru", "7Ok", "4ji", "4DE", "65n", "2JH", "I8", "F9", "SX", "7nF", "4KD", 
 "4eh", "6PK", "2ke", "1Ng", "0Sv", "4L", "491", "4hX", "4Ft", "5C4", "2Hy", "09R", "021", "0cX", "4Xv", "6mU", "6Cy", "42R", "0Mt", "nF", 
 "cv", "1Pe", "5kK", "6NI", "74L", "4UF", "0nh", "MZ", "2TJ", "0ai", "4ZG", "6od", "6AH", "40c", "0OE", "lw", "aG", "0Bu", "5iz", "6Lx", 
diff --git a/src/db.c b/src/db.c
index 4c8c0d287f4..4c5bbd88108 100644
--- a/src/db.c
+++ b/src/db.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -36,6 +15,7 @@
 
 #include <signal.h>
 #include <ctype.h>
+#include "bio.h"
 
 /*-----------------------------------------------------------------------------
  * C-level DB API
@@ -45,7 +25,14 @@
 #define EXPIRE_FORCE_DELETE_EXPIRED 1
 #define EXPIRE_AVOID_DELETE_EXPIRED 2
 
-int expireIfNeeded(redisDb *db, robj *key, int flags);
+/* Return values for expireIfNeeded */
+typedef enum {
+    KEY_VALID = 0, /* Could be volatile and not yet expired, non-volatile, or even non-existing key. */
+    KEY_EXPIRED, /* Logically expired but not yet deleted. */
+    KEY_DELETED /* The key was deleted now. */
+} keyStatus;
+
+keyStatus expireIfNeeded(redisDb *db, robj *key, int flags);
 int keyIsExpired(redisDb *db, robj *key);
 static void dbSetValue(redisDb *db, robj *key, robj *val, int overwrite, dictEntry *de);
 
@@ -86,7 +73,7 @@ void updateLFU(robj *val) {
  * expired on replicas even if the master is lagging expiring our key via DELs
  * in the replication link. */
 robj *lookupKey(redisDb *db, robj *key, int flags) {
-    dictEntry *de = dictFind(db->dict,key->ptr);
+    dictEntry *de = dbFind(db, key->ptr);
     robj *val = NULL;
     if (de) {
         val = dictGetVal(de);
@@ -104,7 +91,7 @@ robj *lookupKey(redisDb *db, robj *key, int flags) {
             expire_flags |= EXPIRE_FORCE_DELETE_EXPIRED;
         if (flags & LOOKUP_NOEXPIRE)
             expire_flags |= EXPIRE_AVOID_DELETE_EXPIRED;
-        if (expireIfNeeded(db, key, expire_flags)) {
+        if (expireIfNeeded(db, key, expire_flags) != KEY_VALID) {
             /* The key is no longer valid. */
             val = NULL;
         }
@@ -188,26 +175,50 @@ robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply) {
 /* Add the key to the DB. It's up to the caller to increment the reference
  * counter of the value if needed.
  *
- * If the update_if_existing argument is false, the the program is aborted
- * if the key already exists, otherwise, it can fall back to dbOverwite. */
-static void dbAddInternal(redisDb *db, robj *key, robj *val, int update_if_existing) {
+ * If the update_if_existing argument is false, the program is aborted
+ * if the key already exists, otherwise, it can fall back to dbOverwrite. */
+static dictEntry *dbAddInternal(redisDb *db, robj *key, robj *val, int update_if_existing) {
     dictEntry *existing;
-    dictEntry *de = dictAddRaw(db->dict, key->ptr, &existing);
+    int slot = getKeySlot(key->ptr);
+    dictEntry *de = kvstoreDictAddRaw(db->keys, slot, key->ptr, &existing);
     if (update_if_existing && existing) {
         dbSetValue(db, key, val, 1, existing);
-        return;
+        return existing;
     }
     serverAssertWithInfo(NULL, key, de != NULL);
-    dictSetKey(db->dict, de, sdsdup(key->ptr));
+    kvstoreDictSetKey(db->keys, slot, de, sdsdup(key->ptr));
     initObjectLRUOrLFU(val);
-    dictSetVal(db->dict, de, val);
+    kvstoreDictSetVal(db->keys, slot, de, val);
     signalKeyAsReady(db, key, val->type);
-    if (server.cluster_enabled) slotToKeyAddEntry(de, db);
     notifyKeyspaceEvent(NOTIFY_NEW,"new",key,db->id);
+    return de;
 }
 
-void dbAdd(redisDb *db, robj *key, robj *val) {
-    dbAddInternal(db, key, val, 0);
+dictEntry *dbAdd(redisDb *db, robj *key, robj *val) {
+    return dbAddInternal(db, key, val, 0);
+}
+
+/* Returns key's hash slot when cluster mode is enabled, or 0 when disabled.
+ * The only difference between this function and getKeySlot, is that it's not using cached key slot from the current_client
+ * and always calculates CRC hash.
+ * This is useful when slot needs to be calculated for a key that user didn't request for, such as in case of eviction. */
+int calculateKeySlot(sds key) {
+    return server.cluster_enabled ? keyHashSlot(key, (int) sdslen(key)) : 0;
+}
+
+/* Return slot-specific dictionary for key based on key's hash slot when cluster mode is enabled, else 0.*/
+int getKeySlot(sds key) {
+    /* This is performance optimization that uses pre-set slot id from the current command,
+     * in order to avoid calculation of the key hash.
+     * This optimization is only used when current_client flag `CLIENT_EXECUTING_COMMAND` is set.
+     * It only gets set during the execution of command under `call` method. Other flows requesting
+     * the key slot would fallback to calculateKeySlot.
+     */
+    if (server.current_client && server.current_client->slot >= 0 && server.current_client->flags & CLIENT_EXECUTING_COMMAND) {
+        debugServerAssertWithInfo(server.current_client, NULL, calculateKeySlot(key)==server.current_client->slot);
+        return server.current_client->slot;
+    }
+    return calculateKeySlot(key);
 }
 
 /* This is a special version of dbAdd() that is used only when loading
@@ -222,11 +233,11 @@ void dbAdd(redisDb *db, robj *key, robj *val) {
  * ownership of the SDS string, otherwise 0 is returned, and is up to the
  * caller to free the SDS string. */
 int dbAddRDBLoad(redisDb *db, sds key, robj *val) {
-    dictEntry *de = dictAddRaw(db->dict, key, NULL);
+    int slot = getKeySlot(key);
+    dictEntry *de = kvstoreDictAddRaw(db->keys, slot, key, NULL);
     if (de == NULL) return 0;
     initObjectLRUOrLFU(val);
-    dictSetVal(db->dict, de, val);
-    if (server.cluster_enabled) slotToKeyAddEntry(de, db);
+    kvstoreDictSetVal(db->keys, slot, de, val);
     return 1;
 }
 
@@ -243,7 +254,8 @@ int dbAddRDBLoad(redisDb *db, sds key, robj *val) {
  *
  * The program is aborted if the key was not already present. */
 static void dbSetValue(redisDb *db, robj *key, robj *val, int overwrite, dictEntry *de) {
-    if (!de) de = dictFind(db->dict,key->ptr);
+    int slot = getKeySlot(key->ptr);
+    if (!de) de = kvstoreDictFind(db->keys, slot, key->ptr);
     serverAssertWithInfo(NULL,key,de != NULL);
     robj *old = dictGetVal(de);
 
@@ -263,13 +275,16 @@ static void dbSetValue(redisDb *db, robj *key, robj *val, int overwrite, dictEnt
         /* Because of RM_StringDMA, old may be changed, so we need get old again */
         old = dictGetVal(de);
     }
-    dictSetVal(db->dict, de, val);
+    kvstoreDictSetVal(db->keys, slot, de, val);
+
+    /* if hash with HFEs, take care to remove from global HFE DS */
+    if (old->type == OBJ_HASH)
+        hashTypeRemoveFromExpires(&db->hexpires, old);
 
     if (server.lazyfree_lazy_server_del) {
         freeObjAsync(key,old,db->id);
     } else {
-        /* This is just decrRefCount(old); */
-        db->dict->type->valDestructor(db->dict, old);
+        decrRefCount(old);
     }
 }
 
@@ -321,18 +336,18 @@ void setKey(client *c, redisDb *db, robj *key, robj *val, int flags) {
 robj *dbRandomKey(redisDb *db) {
     dictEntry *de;
     int maxtries = 100;
-    int allvolatile = dictSize(db->dict) == dictSize(db->expires);
+    int allvolatile = kvstoreSize(db->keys) == kvstoreSize(db->expires);
 
     while(1) {
         sds key;
         robj *keyobj;
-
-        de = dictGetFairRandomKey(db->dict);
+        int randomSlot = kvstoreGetFairRandomDictIndex(db->keys);
+        de = kvstoreDictGetFairRandomKey(db->keys, randomSlot);
         if (de == NULL) return NULL;
 
         key = dictGetKey(de);
         keyobj = createStringObject(key,sdslen(key));
-        if (dictFind(db->expires,key)) {
+        if (dbFindExpires(db, key)) {
             if (allvolatile && server.masterhost && --maxtries == 0) {
                 /* If the DB is composed only of keys with an expire set,
                  * it could happen that all the keys are already logically
@@ -344,7 +359,7 @@ robj *dbRandomKey(redisDb *db) {
                  * return a key name that may be already expired. */
                 return keyobj;
             }
-            if (expireIfNeeded(db,keyobj,0)) {
+            if (expireIfNeeded(db,keyobj,0) != KEY_VALID) {
                 decrRefCount(keyobj);
                 continue; /* search for another key. This expired. */
             }
@@ -357,9 +372,15 @@ robj *dbRandomKey(redisDb *db) {
 int dbGenericDelete(redisDb *db, robj *key, int async, int flags) {
     dictEntry **plink;
     int table;
-    dictEntry *de = dictTwoPhaseUnlinkFind(db->dict,key->ptr,&plink,&table);
+    int slot = getKeySlot(key->ptr);
+    dictEntry *de = kvstoreDictTwoPhaseUnlinkFind(db->keys, slot, key->ptr, &plink, &table);
     if (de) {
         robj *val = dictGetVal(de);
+
+        /* If hash object with expiry on fields, remove it from HFE DS of DB */
+        if (val->type == OBJ_HASH)
+            hashTypeRemoveFromExpires(&db->hexpires, val);
+
         /* RM_StringDMA may call dbUnshareStringValue which may free val, so we
          * need to incr to retain val */
         incrRefCount(val);
@@ -373,14 +394,13 @@ int dbGenericDelete(redisDb *db, robj *key, int async, int flags) {
         if (async) {
             /* Because of dbUnshareStringValue, the val in de may change. */
             freeObjAsync(key, dictGetVal(de), db->id);
-            dictSetVal(db->dict, de, NULL);
+            kvstoreDictSetVal(db->keys, slot, de, NULL);
         }
-        if (server.cluster_enabled) slotToKeyDelEntry(de, db);
-
         /* Deleting an entry from the expires dict will not free the sds of
-        * the key, because it is shared with the main dictionary. */
-        if (dictSize(db->expires) > 0) dictDelete(db->expires,key->ptr);
-        dictTwoPhaseUnlinkFree(db->dict,de,plink,table);
+         * the key, because it is shared with the main dictionary. */
+        kvstoreDictDelete(db->expires, slot, key->ptr);
+
+        kvstoreDictTwoPhaseUnlinkFree(db->keys, slot, de, plink, table);
         return 1;
     } else {
         return 0;
@@ -462,12 +482,15 @@ long long emptyDbStructure(redisDb *dbarray, int dbnum, int async,
     }
 
     for (int j = startdb; j <= enddb; j++) {
-        removed += dictSize(dbarray[j].dict);
+        removed += kvstoreSize(dbarray[j].keys);
         if (async) {
             emptyDbAsync(&dbarray[j]);
         } else {
-            dictEmpty(dbarray[j].dict,callback);
-            dictEmpty(dbarray[j].expires,callback);
+            /* Destroy global HFE DS before deleting the hashes since ebuckets
+             * DS is embedded in the stored objects. */
+            ebDestroy(&dbarray[j].hexpires, &hashExpireBucketsType, NULL);
+            kvstoreEmpty(dbarray[j].keys, callback);
+            kvstoreEmpty(dbarray[j].expires, callback);
         }
         /* Because all keys of database are removed, reset average ttl. */
         dbarray[j].avg_ttl = 0;
@@ -516,11 +539,6 @@ long long emptyData(int dbnum, int flags, void(callback)(dict*)) {
     /* Empty redis database structure. */
     removed = emptyDbStructure(server.db, dbnum, async, callback);
 
-    /* Flush slots to keys map if enable cluster, we can flush entire
-     * slots to keys map whatever dbnum because only support one DB
-     * in cluster mode. */
-    if (server.cluster_enabled) slotToKeyFlush(server.db);
-
     if (dbnum == -1) flushSlaveKeysWithExpireList();
 
     if (with_functions) {
@@ -539,16 +557,18 @@ long long emptyData(int dbnum, int flags, void(callback)(dict*)) {
 
 /* Initialize temporary db on replica for use during diskless replication. */
 redisDb *initTempDb(void) {
+    int slot_count_bits = 0;
+    int flags = KVSTORE_ALLOCATE_DICTS_ON_DEMAND;
+    if (server.cluster_enabled) {
+        slot_count_bits = CLUSTER_SLOT_MASK_BITS;
+        flags |= KVSTORE_FREE_EMPTY_DICTS;
+    }
     redisDb *tempDb = zcalloc(sizeof(redisDb)*server.dbnum);
     for (int i=0; i<server.dbnum; i++) {
-        tempDb[i].dict = dictCreate(&dbDictType);
-        tempDb[i].expires = dictCreate(&dbExpiresDictType);
-        tempDb[i].slots_to_keys = NULL;
-    }
-
-    if (server.cluster_enabled) {
-        /* Prepare temp slot to key map to be written during async diskless replication. */
-        slotToKeyInit(tempDb);
+        tempDb[i].id = i;
+        tempDb[i].keys = kvstoreCreate(&dbDictType, slot_count_bits, flags);
+        tempDb[i].expires = kvstoreCreate(&dbExpiresDictType, slot_count_bits, flags);
+        tempDb[i].hexpires = ebCreate();
     }
 
     return tempDb;
@@ -561,13 +581,11 @@ void discardTempDb(redisDb *tempDb, void(callback)(dict*)) {
     /* Release temp DBs. */
     emptyDbStructure(tempDb, -1, async, callback);
     for (int i=0; i<server.dbnum; i++) {
-        dictRelease(tempDb[i].dict);
-        dictRelease(tempDb[i].expires);
-    }
-
-    if (server.cluster_enabled) {
-        /* Release temp slot to key map. */
-        slotToKeyDestroy(tempDb);
+        /* Destroy global HFE DS before deleting the hashes since ebuckets DS is
+         * embedded in the stored objects. */
+        ebDestroy(&tempDb[i].hexpires, &hashExpireBucketsType, NULL);
+        kvstoreRelease(tempDb[i].keys);
+        kvstoreRelease(tempDb[i].expires);
     }
 
     zfree(tempDb);
@@ -584,7 +602,7 @@ long long dbTotalServerKeyCount(void) {
     long long total = 0;
     int j;
     for (j = 0; j < server.dbnum; j++) {
-        total += dictSize(server.db[j].dict);
+        total += kvstoreSize(server.db[j].keys);
     }
     return total;
 }
@@ -630,7 +648,7 @@ void signalFlushedDb(int dbid, int async) {
  * Type agnostic commands operating on the key space
  *----------------------------------------------------------------------------*/
 
-/* Return the set of flags to use for the emptyDb() call for FLUSHALL
+/* Return the set of flags to use for the emptyData() call for FLUSHALL
  * and FLUSHDB commands.
  *
  * sync: flushes the database in an sync manner.
@@ -668,50 +686,110 @@ void flushAllDataAndResetRDB(int flags) {
     /* jemalloc 5 doesn't release pages back to the OS when there's no traffic.
      * for large databases, flushdb blocks for long anyway, so a bit more won't
      * harm and this way the flush and purge will be synchronous. */
-    if (!(flags & EMPTYDB_ASYNC))
+    if (!(flags & EMPTYDB_ASYNC)) {
+        /* Only clear the current thread cache.
+         * Ignore the return call since this will fail if the tcache is disabled. */
+        je_mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
+
         jemalloc_purge();
+    }
 #endif
 }
 
-/* FLUSHDB [ASYNC]
- *
- * Flushes the currently SELECTed Redis DB. */
-void flushdbCommand(client *c) {
-    int flags;
+/* Optimized FLUSHALL\FLUSHDB SYNC command finished to run by lazyfree thread */
+void flushallSyncBgDone(uint64_t client_id) {
+
+    client *c = lookupClientByID(client_id);
+
+    /* Verify that client still exists */
+    if (!c) return;
+
+    /* Update current_client (Called functions might rely on it) */
+    client *old_client = server.current_client;
+    server.current_client = c;
+
+    /* Don't update blocked_us since command was processed in bg by lazy_free thread */
+    updateStatsOnUnblock(c, 0 /*blocked_us*/, elapsedUs(c->bstate.lazyfreeStartTime), 0);
 
+    /* lazyfree bg job always succeed */
+    addReply(c, shared.ok);
+
+    /* mark client as unblocked */
+    unblockClient(c, 1);
+
+    /* FLUSH command is finished. resetClient() and update replication offset. */
+    commandProcessed(c);
+
+    /* On flush completion, update the client's memory */
+    updateClientMemUsageAndBucket(c);
+
+    /* restore current_client */
+    server.current_client = old_client;
+}
+
+void flushCommandCommon(client *c, int isFlushAll) {
+    int blocking_async = 0; /* FLUSHALL\FLUSHDB SYNC opt to run as blocking ASYNC */
+    int flags;
     if (getFlushCommandFlags(c,&flags) == C_ERR) return;
-    /* flushdb should not flush the functions */
-    server.dirty += emptyData(c->db->id,flags | EMPTYDB_NOFUNCTIONS,NULL);
 
-    /* Without the forceCommandPropagation, when DB was already empty,
-     * FLUSHDB will not be replicated nor put into the AOF. */
+    /* in case of SYNC, check if we can optimize and run it in bg as blocking ASYNC */
+    if ((!(flags & EMPTYDB_ASYNC)) && (!(c->flags & CLIENT_AVOID_BLOCKING_ASYNC_FLUSH))) {
+        /* Run as ASYNC */
+        flags |= EMPTYDB_ASYNC;
+        blocking_async = 1;
+    }
+
+    if (isFlushAll)
+        flushAllDataAndResetRDB(flags | EMPTYDB_NOFUNCTIONS);
+    else
+        server.dirty += emptyData(c->db->id,flags | EMPTYDB_NOFUNCTIONS,NULL);
+
+    /* Without the forceCommandPropagation, when DB(s) was already empty,
+     * FLUSHALL\FLUSHDB will not be replicated nor put into the AOF. */
     forceCommandPropagation(c, PROPAGATE_REPL | PROPAGATE_AOF);
 
-    addReply(c,shared.ok);
+    /* if blocking ASYNC, block client and add completion job request to BIO lazyfree
+     * worker's queue. To be called and reply with OK only after all preceding pending
+     * lazyfree jobs in queue were processed */
+    if (blocking_async) {
+        /* measure bg job till completion as elapsed time of flush command */
+        elapsedStart(&c->bstate.lazyfreeStartTime);
 
+        c->bstate.timeout = 0;
+        blockClient(c,BLOCKED_LAZYFREE);
+        bioCreateCompRq(BIO_WORKER_LAZY_FREE, flushallSyncBgDone, c->id);
+    } else {
+        addReply(c, shared.ok);
+    }
 #if defined(USE_JEMALLOC)
     /* jemalloc 5 doesn't release pages back to the OS when there's no traffic.
      * for large databases, flushdb blocks for long anyway, so a bit more won't
-     * harm and this way the flush and purge will be synchronous. */
-    if (!(flags & EMPTYDB_ASYNC))
+     * harm and this way the flush and purge will be synchronous.
+     *
+     * Take care purge only FLUSHDB for sync flow. FLUSHALL sync flow already
+     * applied at flushAllDataAndResetRDB. Async flow will apply only later on */
+    if ((!isFlushAll) && (!(flags & EMPTYDB_ASYNC))) {
+        /* Only clear the current thread cache.
+         * Ignore the return call since this will fail if the tcache is disabled. */
+        je_mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
+
         jemalloc_purge();
+    }
 #endif
 }
 
-/* FLUSHALL [ASYNC]
+/* FLUSHALL [SYNC|ASYNC]
  *
  * Flushes the whole server data set. */
 void flushallCommand(client *c) {
-    int flags;
-    if (getFlushCommandFlags(c,&flags) == C_ERR) return;
-    /* flushall should not flush the functions */
-    flushAllDataAndResetRDB(flags | EMPTYDB_NOFUNCTIONS);
-
-    /* Without the forceCommandPropagation, when DBs were already empty,
-     * FLUSHALL will not be replicated nor put into the AOF. */
-    forceCommandPropagation(c, PROPAGATE_REPL | PROPAGATE_AOF);
+    flushCommandCommon(c, 1);
+}
 
-    addReply(c,shared.ok);
+/* FLUSHDB [SYNC|ASYNC]
+ *
+ * Flushes the currently SELECTed Redis DB. */
+void flushdbCommand(client *c) {
+    flushCommandCommon(c, 0);
 }
 
 /* This command implements DEL and UNLINK. */
@@ -719,7 +797,8 @@ void delGenericCommand(client *c, int lazy) {
     int numdel = 0, j;
 
     for (j = 1; j < c->argc; j++) {
-        expireIfNeeded(c->db,c->argv[j],0);
+        if (expireIfNeeded(c->db,c->argv[j],0) == KEY_DELETED)
+            continue;
         int deleted  = lazy ? dbAsyncDelete(c->db,c->argv[j]) :
                               dbSyncDelete(c->db,c->argv[j]);
         if (deleted) {
@@ -783,17 +862,29 @@ void randomkeyCommand(client *c) {
 }
 
 void keysCommand(client *c) {
-    dictIterator *di;
     dictEntry *de;
     sds pattern = c->argv[1]->ptr;
-    int plen = sdslen(pattern), allkeys;
+    int plen = sdslen(pattern), allkeys, pslot = -1;
     unsigned long numkeys = 0;
     void *replylen = addReplyDeferredLen(c);
-
-    di = dictGetSafeIterator(c->db->dict);
     allkeys = (pattern[0] == '*' && plen == 1);
+    if (server.cluster_enabled && !allkeys) {
+        pslot = patternHashSlot(pattern, plen);
+    }
+    kvstoreDictIterator *kvs_di = NULL;
+    kvstoreIterator *kvs_it = NULL;
+    if (pslot != -1) {
+        if (!kvstoreDictSize(c->db->keys, pslot)) {
+            /* Requested slot is empty */
+            setDeferredArrayLen(c,replylen,0);
+            return;
+        }
+        kvs_di = kvstoreGetDictSafeIterator(c->db->keys, pslot);
+    } else {
+        kvs_it = kvstoreIteratorInit(c->db->keys);
+    }
     robj keyobj;
-    while((de = dictNext(di)) != NULL) {
+    while ((de = kvs_di ? kvstoreDictIteratorNext(kvs_di) : kvstoreIteratorNext(kvs_it)) != NULL) {
         sds key = dictGetKey(de);
 
         if (allkeys || stringmatchlen(pattern,plen,key,sdslen(key),0)) {
@@ -806,7 +897,10 @@ void keysCommand(client *c) {
         if (c->flags & CLIENT_CLOSE_ASAP)
             break;
     }
-    dictReleaseIterator(di);
+    if (kvs_di)
+        kvstoreReleaseDictIterator(kvs_di);
+    if (kvs_it)
+        kvstoreIteratorRelease(kvs_it);
     setDeferredArrayLen(c,replylen,numkeys);
 }
 
@@ -817,6 +911,8 @@ typedef struct {
     long long type; /* the particular type when scan the db */
     sds pattern;  /* pattern string, NULL means no pattern */
     long sampled; /* cumulative number of keys sampled */
+    int no_values; /* set to 1 means to return keys only */
+    size_t (*strlen)(char *s); /* (o->type == OBJ_HASH) ? hfieldlen : sdslen */
 } scanData;
 
 /* Helper function to compare key type in scan commands */
@@ -841,7 +937,7 @@ void scanCallback(void *privdata, const dictEntry *de) {
     list *keys = data->keys;
     robj *o = data->o;
     sds val = NULL;
-    sds key = NULL;
+    void *key = NULL;  /* if OBJ_HASH then key is of type `hfield`. Otherwise, `sds` */
     data->sampled++;
 
     /* o and typename can not have values at the same time. */
@@ -854,46 +950,44 @@ void scanCallback(void *privdata, const dictEntry *de) {
     }
 
     /* Filter element if it does not match the pattern. */
-    sds keysds = dictGetKey(de);
+    void *keyStr = dictGetKey(de);
     if (data->pattern) {
-        if (!stringmatchlen(data->pattern, sdslen(data->pattern), keysds, sdslen(keysds), 0)) {
+        if (!stringmatchlen(data->pattern, sdslen(data->pattern), keyStr, data->strlen(keyStr), 0)) {
             return;
         }
     }
 
     if (o == NULL) {
-        key = keysds;
+        key = keyStr;
     } else if (o->type == OBJ_SET) {
-        key = keysds;
+        key = keyStr;
     } else if (o->type == OBJ_HASH) {
-        key = keysds;
+        key = keyStr;
         val = dictGetVal(de);
+
+        /* If field is expired, then ignore */
+        if (hfieldIsExpired(key))
+            return;
+
     } else if (o->type == OBJ_ZSET) {
         char buf[MAX_LONG_DOUBLE_CHARS];
         int len = ld2string(buf, sizeof(buf), *(double *)dictGetVal(de), LD_STR_AUTO);
-        key = sdsdup(keysds);
+        key = sdsdup(keyStr);
         val = sdsnewlen(buf, len);
     } else {
         serverPanic("Type not handled in SCAN callback.");
     }
 
     listAddNodeTail(keys, key);
-    if (val) listAddNodeTail(keys, val);
+    if (val && !data->no_values) listAddNodeTail(keys, val);
 }
 
 /* Try to parse a SCAN cursor stored at object 'o':
  * if the cursor is valid, store it as unsigned integer into *cursor and
  * returns C_OK. Otherwise return C_ERR and send an error to the
  * client. */
-int parseScanCursorOrReply(client *c, robj *o, unsigned long *cursor) {
-    char *eptr;
-
-    /* Use strtoul() because we need an *unsigned* long, so
-     * getLongLongFromObject() does not cover the whole cursor space. */
-    errno = 0;
-    *cursor = strtoul(o->ptr, &eptr, 10);
-    if (isspace(((char*)o->ptr)[0]) || eptr[0] != '\0' || errno == ERANGE)
-    {
+int parseScanCursorOrReply(client *c, robj *o, unsigned long long *cursor) {
+    if (!string2ull(o->ptr, cursor)) {
         addReplyError(c, "invalid cursor");
         return C_ERR;
     }
@@ -951,14 +1045,15 @@ char *getObjectTypeName(robj *o) {
  *
  * In the case of a Hash object the function returns both the field and value
  * of every element on the Hash. */
-void scanGenericCommand(client *c, robj *o, unsigned long cursor) {
+void scanGenericCommand(client *c, robj *o, unsigned long long cursor) {
+    int isKeysHfield = 0;
     int i, j;
     listNode *node;
     long count = 10;
     sds pat = NULL;
     sds typename = NULL;
     long long type = LLONG_MAX;
-    int patlen = 0, use_pattern = 0;
+    int patlen = 0, use_pattern = 0, no_values = 0;
     dict *ht;
 
     /* Object must be NULL (to iterate keys names), or the type of the object
@@ -1003,6 +1098,13 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) {
                 return;
             }
             i+= 2;
+        } else if (!strcasecmp(c->argv[i]->ptr, "novalues")) {
+            if (!o || o->type != OBJ_HASH) {
+                addReplyError(c, "NOVALUES option can only be used in HSCAN");
+                return;
+            }
+            no_values = 1;
+            i++;
         } else {
             addReplyErrorObject(c,shared.syntaxerr);
             return;
@@ -1020,10 +1122,11 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) {
     /* Handle the case of a hash table. */
     ht = NULL;
     if (o == NULL) {
-        ht = c->db->dict;
+        ht = NULL;
     } else if (o->type == OBJ_SET && o->encoding == OBJ_ENCODING_HT) {
         ht = o->ptr;
     } else if (o->type == OBJ_HASH && o->encoding == OBJ_ENCODING_HT) {
+        isKeysHfield = 1;
         ht = o->ptr;
     } else if (o->type == OBJ_ZSET && o->encoding == OBJ_ENCODING_SKIPLIST) {
         zset *zs = o->ptr;
@@ -1043,7 +1146,8 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) {
         listSetFreeMethod(keys, (void (*)(void*))sdsfree);
     }
 
-    if (ht) {
+    /* For main dictionary scan or data structure using hashtable. */
+    if (!o || ht) {
         /* We set the max number of iterations to ten times the specified
          * COUNT, so if the hash table is in a pathological state (very
          * sparsely populated) we avoid to block too much time at the cost
@@ -1056,20 +1160,36 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) {
          * it is possible to fetch more data in a type-dependent way;
          * 3. data.type: the specified type scan in the db, LLONG_MAX means
          * type matching is no needed;
-         * 4. data.pattern: the pattern string
+         * 4. data.pattern: the pattern string;
          * 5. data.sampled: the maxiteration limit is there in case we're
          * working on an empty dict, one with a lot of empty buckets, and
          * for the buckets are not empty, we need to limit the spampled number
-         * to prevent a long hang time caused by filtering too many keys*/
+         * to prevent a long hang time caused by filtering too many keys;
+         * 6. data.no_values: to control whether values will be returned or
+         * only keys are returned. */
         scanData data = {
             .keys = keys,
             .o = o,
             .type = type,
             .pattern = use_pattern ? pat : NULL,
             .sampled = 0,
+            .no_values = no_values,
+            .strlen = (isKeysHfield) ? hfieldlen : sdslen,
         };
+
+        /* A pattern may restrict all matching keys to one cluster slot. */
+        int onlydidx = -1;
+        if (o == NULL && use_pattern && server.cluster_enabled) {
+            onlydidx = patternHashSlot(pat, patlen);
+        }
         do {
-            cursor = dictScan(ht, cursor, scanCallback, &data);
+            /* In cluster mode there is a separate dictionary for each slot.
+             * If cursor is empty, we should try exploring next non-empty slot. */
+            if (o == NULL) {
+                cursor = kvstoreScan(c->db->keys, cursor, onlydidx, scanCallback, NULL, &data);
+            } else {
+                cursor = dictScan(ht, cursor, scanCallback, &data);
+            }
         } while (cursor && maxiterations-- && data.sampled < count);
     } else if (o->type == OBJ_SET) {
         char *str;
@@ -1109,9 +1229,45 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) {
             /* add key object */
             listAddNodeTail(keys, sdsnewlen(str, len));
             /* add value object */
+            if (!no_values) {
+                str = lpGet(p, &len, intbuf);
+                listAddNodeTail(keys, sdsnewlen(str, len));
+            }
+            p = lpNext(o->ptr, p);
+        }
+        cursor = 0;
+    } else if (o->type == OBJ_HASH && o->encoding == OBJ_ENCODING_LISTPACK_EX) {
+        int64_t len;
+        long long expire_at;
+        unsigned char *lp = hashTypeListpackGetLp(o);
+        unsigned char *p = lpFirst(lp);
+        unsigned char *str, *val;
+        unsigned char intbuf[LP_INTBUF_SIZE];
+
+        while (p) {
             str = lpGet(p, &len, intbuf);
+            p = lpNext(lp, p);
+            val = p; /* Keep pointer to value */
+
+            p = lpNext(lp, p);
+            serverAssert(p && lpGetIntegerValue(p, &expire_at));
+
+            if (hashTypeIsExpired(o, expire_at) ||
+               (use_pattern && !stringmatchlen(pat, sdslen(pat), (char *)str, len, 0)))
+            {
+                /* jump to the next key/val pair */
+                p = lpNext(lp, p);
+                continue;
+            }
+
+            /* add key object */
             listAddNodeTail(keys, sdsnewlen(str, len));
-            p = lpNext(o->ptr, p);
+            /* add value object */
+            if (!no_values) {
+                str = lpGet(val, &len, intbuf);
+                listAddNodeTail(keys, sdsnewlen(str, len));
+            }
+            p = lpNext(lp, p);
         }
         cursor = 0;
     } else {
@@ -1137,10 +1293,14 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) {
     addReplyArrayLen(c, 2);
     addReplyBulkLongLong(c,cursor);
 
+    unsigned long long idx = 0;
     addReplyArrayLen(c, listLength(keys));
     while ((node = listFirst(keys)) != NULL) {
-        sds key = listNodeValue(node);
-        addReplyBulkCBuffer(c, key, sdslen(key));
+        void *key = listNodeValue(node);
+        /* For HSCAN, list will contain keys value pairs unless no_values arg
+         * was given. We should call mstrlen for the keys only. */
+        int hfieldkey = isKeysHfield && (no_values || (idx++ % 2 == 0));
+        addReplyBulkCBuffer(c, key, hfieldkey ? mstrlen(key) : sdslen(key));
         listDelNode(keys, node);
     }
 
@@ -1149,13 +1309,13 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) {
 
 /* The SCAN command completely relies on scanGenericCommand. */
 void scanCommand(client *c) {
-    unsigned long cursor;
+    unsigned long long cursor;
     if (parseScanCursorOrReply(c,c->argv[1],&cursor) == C_ERR) return;
     scanGenericCommand(c,NULL,cursor);
 }
 
 void dbsizeCommand(client *c) {
-    addReplyLongLong(c,dictSize(c->db->dict));
+    addReplyLongLong(c,kvstoreSize(c->db->keys));
 }
 
 void lastsaveCommand(client *c) {
@@ -1233,6 +1393,7 @@ void renameGenericCommand(client *c, int nx) {
     robj *o;
     long long expire;
     int samekey = 0;
+    uint64_t minHashExpireTime = EB_EXPIRE_TIME_INVALID;
 
     /* When source and dest key is the same, no operation is performed,
      * if the key exists, however we still return an error on unexisting key. */
@@ -1258,9 +1419,21 @@ void renameGenericCommand(client *c, int nx) {
          * with the same name. */
         dbDelete(c->db,c->argv[2]);
     }
-    dbAdd(c->db,c->argv[2],o);
+    dictEntry *de = dbAdd(c->db, c->argv[2], o);
     if (expire != -1) setExpire(c,c->db,c->argv[2],expire);
+
+    /* If hash with expiration on fields then remove it from global HFE DS and
+     * keep next expiration time. Otherwise, dbDelete() will remove it from the
+     * global HFE DS and we will lose the expiration time. */
+    if (o->type == OBJ_HASH)
+        minHashExpireTime = hashTypeRemoveFromExpires(&c->db->hexpires, o);
+
     dbDelete(c->db,c->argv[1]);
+
+    /* If hash with HFEs, register in db->hexpires */
+    if (minHashExpireTime != EB_EXPIRE_TIME_INVALID)
+        hashTypeAddToExpires(c->db, dictGetKey(de), o, minHashExpireTime);
+
     signalModifiedKey(c,c->db,c->argv[1]);
     signalModifiedKey(c,c->db,c->argv[2]);
     notifyKeyspaceEvent(NOTIFY_GENERIC,"rename_from",
@@ -1284,6 +1457,7 @@ void moveCommand(client *c) {
     redisDb *src, *dst;
     int srcid, dbid;
     long long expire;
+    uint64_t hashExpireTime = EB_EXPIRE_TIME_INVALID;
 
     if (server.cluster_enabled) {
         addReplyError(c,"MOVE is not allowed in cluster mode");
@@ -1324,12 +1498,25 @@ void moveCommand(client *c) {
         addReply(c,shared.czero);
         return;
     }
-    dbAdd(dst,c->argv[1],o);
+    dictEntry *dstDictEntry = dbAdd(dst,c->argv[1],o);
     if (expire != -1) setExpire(c,dst,c->argv[1],expire);
+
+    /* If hash with expiration on fields, remove it from global HFE DS and keep
+     * aside registered expiration time. Must be before deletion of the object.
+     * hexpires (ebuckets) embed in stored items its structure. */
+    if (o->type == OBJ_HASH)
+        hashExpireTime = hashTypeRemoveFromExpires(&src->hexpires, o);
+
     incrRefCount(o);
 
     /* OK! key moved, free the entry in the source DB */
     dbDelete(src,c->argv[1]);
+
+    /* If object of type hash with expiration on fields. Taken care to add the
+     * hash to hexpires of `dst` only after dbDelete(). */
+    if (hashExpireTime != EB_EXPIRE_TIME_INVALID)
+        hashTypeAddToExpires(dst, dictGetKey(dstDictEntry), o, hashExpireTime);
+
     signalModifiedKey(c,src,c->argv[1]);
     signalModifiedKey(c,dst,c->argv[1]);
     notifyKeyspaceEvent(NOTIFY_GENERIC,
@@ -1412,12 +1599,13 @@ void copyCommand(client *c) {
 
     /* Duplicate object according to object's type. */
     robj *newobj;
+    uint64_t minHashExpire = EB_EXPIRE_TIME_INVALID; /* HFE feature */
     switch(o->type) {
         case OBJ_STRING: newobj = dupStringObject(o); break;
         case OBJ_LIST: newobj = listTypeDup(o); break;
         case OBJ_SET: newobj = setTypeDup(o); break;
         case OBJ_ZSET: newobj = zsetDup(o); break;
-        case OBJ_HASH: newobj = hashTypeDup(o); break;
+        case OBJ_HASH: newobj = hashTypeDup(o, newkey->ptr, &minHashExpire); break;
         case OBJ_STREAM: newobj = streamDup(o); break;
         case OBJ_MODULE:
             newobj = moduleTypeDupOrReply(c, key, newkey, dst->id, o);
@@ -1432,8 +1620,16 @@ void copyCommand(client *c) {
         dbDelete(dst,newkey);
     }
 
-    dbAdd(dst,newkey,newobj);
-    if (expire != -1) setExpire(c, dst, newkey, expire);
+    dictEntry *deCopy = dbAdd(dst,newkey,newobj);
+
+    /* if key with expiration then set it */
+    if (expire != -1)
+        setExpire(c, dst, newkey, expire);
+
+    /* If minExpiredField was set, then the object is hash with expiration
+     * on fields and need to register it in global HFE DS */
+    if (minHashExpire != EB_EXPIRE_TIME_INVALID)
+        hashTypeAddToExpires(dst, dictGetKey(deCopy), newobj, minHashExpire);
 
     /* OK! key copied */
     signalModifiedKey(c,dst,c->argv[2]);
@@ -1452,7 +1648,7 @@ void scanDatabaseForReadyKeys(redisDb *db) {
     dictIterator *di = dictGetSafeIterator(db->blocking_keys);
     while((de = dictNext(di)) != NULL) {
         robj *key = dictGetKey(de);
-        dictEntry *kde = dictFind(db->dict,key->ptr);
+        dictEntry *kde = dbFind(db, key->ptr);
         if (kde) {
             robj *value = dictGetVal(kde);
             signalKeyAsReady(db, key, value->type);
@@ -1472,7 +1668,7 @@ void scanDatabaseForDeletedKeys(redisDb *emptied, redisDb *replaced_with) {
         int existed = 0, exists = 0;
         int original_type = -1, curr_type = -1;
 
-        dictEntry *kde = dictFind(emptied->dict, key->ptr);
+        dictEntry *kde = dbFind(emptied, key->ptr);
         if (kde) {
             robj *value = dictGetVal(kde);
             original_type = value->type;
@@ -1480,7 +1676,7 @@ void scanDatabaseForDeletedKeys(redisDb *emptied, redisDb *replaced_with) {
         }
 
         if (replaced_with) {
-            dictEntry *kde = dictFind(replaced_with->dict, key->ptr);
+            kde = dbFind(replaced_with, key->ptr);
             if (kde) {
                 robj *value = dictGetVal(kde);
                 curr_type = value->type;
@@ -1521,13 +1717,15 @@ int dbSwapDatabases(int id1, int id2) {
     /* Swap hash tables. Note that we don't swap blocking_keys,
      * ready_keys and watched_keys, since we want clients to
      * remain in the same DB they were. */
-    db1->dict = db2->dict;
+    db1->keys = db2->keys;
     db1->expires = db2->expires;
+    db1->hexpires = db2->hexpires;
     db1->avg_ttl = db2->avg_ttl;
     db1->expires_cursor = db2->expires_cursor;
 
-    db2->dict = aux.dict;
+    db2->keys = aux.keys;
     db2->expires = aux.expires;
+    db2->hexpires = aux.hexpires;
     db2->avg_ttl = aux.avg_ttl;
     db2->expires_cursor = aux.expires_cursor;
 
@@ -1549,13 +1747,6 @@ int dbSwapDatabases(int id1, int id2) {
  * database (temp) as the main (active) database, the actual freeing of old database
  * (which will now be placed in the temp one) is done later. */
 void swapMainDbWithTempDb(redisDb *tempDb) {
-    if (server.cluster_enabled) {
-        /* Swap slots_to_keys from tempdb just loaded with main db slots_to_keys. */
-        clusterSlotToKeyMapping *aux = server.db->slots_to_keys;
-        server.db->slots_to_keys = tempDb->slots_to_keys;
-        tempDb->slots_to_keys = aux;
-    }
-
     for (int i=0; i<server.dbnum; i++) {
         redisDb aux = server.db[i];
         redisDb *activedb = &server.db[i], *newdb = &tempDb[i];
@@ -1570,13 +1761,15 @@ void swapMainDbWithTempDb(redisDb *tempDb) {
         /* Swap hash tables. Note that we don't swap blocking_keys,
          * ready_keys and watched_keys, since clients 
          * remain in the same DB they were. */
-        activedb->dict = newdb->dict;
+        activedb->keys = newdb->keys;
         activedb->expires = newdb->expires;
+        activedb->hexpires = newdb->hexpires;
         activedb->avg_ttl = newdb->avg_ttl;
         activedb->expires_cursor = newdb->expires_cursor;
 
-        newdb->dict = aux.dict;
+        newdb->keys = aux.keys;
         newdb->expires = aux.expires;
+        newdb->hexpires = aux.hexpires;
         newdb->avg_ttl = aux.avg_ttl;
         newdb->expires_cursor = aux.expires_cursor;
 
@@ -1632,7 +1825,7 @@ void swapdbCommand(client *c) {
  *----------------------------------------------------------------------------*/
 
 int removeExpire(redisDb *db, robj *key) {
-    return dictDelete(db->expires,key->ptr) == DICT_OK;
+    return kvstoreDictDelete(db->expires, getKeySlot(key->ptr), key->ptr) == DICT_OK;
 }
 
 /* Set an expire to the specified key. If the expire is set in the context
@@ -1640,13 +1833,18 @@ int removeExpire(redisDb *db, robj *key) {
  * to NULL. The 'when' parameter is the absolute unix time in milliseconds
  * after which the key will no longer be considered valid. */
 void setExpire(client *c, redisDb *db, robj *key, long long when) {
-    dictEntry *kde, *de;
+    dictEntry *kde, *de, *existing;
 
     /* Reuse the sds from the main dict in the expire dict */
-    kde = dictFind(db->dict,key->ptr);
+    int slot = getKeySlot(key->ptr);
+    kde = kvstoreDictFind(db->keys, slot, key->ptr);
     serverAssertWithInfo(NULL,key,kde != NULL);
-    de = dictAddOrFind(db->expires,dictGetKey(kde));
-    dictSetSignedIntegerVal(de,when);
+    de = kvstoreDictAddRaw(db->expires, slot, dictGetKey(kde), &existing);
+    if (existing) {
+        dictSetSignedIntegerVal(existing, when);
+    } else {
+        dictSetSignedIntegerVal(de, when);
+    }
 
     int writable_slave = server.masterhost && server.repl_slave_ro == 0;
     if (c && writable_slave && !(c->flags & CLIENT_MASTER))
@@ -1658,9 +1856,8 @@ void setExpire(client *c, redisDb *db, robj *key, long long when) {
 long long getExpire(redisDb *db, robj *key) {
     dictEntry *de;
 
-    /* No expire? return ASAP */
-    if (dictSize(db->expires) == 0 ||
-       (de = dictFind(db->expires,key->ptr)) == NULL) return -1;
+    if ((de = dbFindExpires(db, key->ptr)) == NULL)
+        return -1;
 
     return dictGetSignedIntegerVal(de);
 }
@@ -1678,23 +1875,24 @@ void deleteExpiredKeyAndPropagate(redisDb *db, robj *keyobj) {
     server.stat_expiredkeys++;
 }
 
-/* Propagate expires into slaves and the AOF file.
- * When a key expires in the master, a DEL operation for this key is sent
- * to all the slaves and the AOF file if enabled.
+/* Propagate an implicit key deletion into replicas and the AOF file.
+ * When a key was deleted in the master by eviction, expiration or a similar
+ * mechanism a DEL/UNLINK operation for this key is sent
+ * to all the replicas and the AOF file if enabled.
  *
- * This way the key expiry is centralized in one place, and since both
- * AOF and the master->slave link guarantee operation ordering, everything
- * will be consistent even if we allow write operations against expiring
+ * This way the key deletion is centralized in one place, and since both
+ * AOF and the replication link guarantee operation ordering, everything
+ * will be consistent even if we allow write operations against deleted
  * keys.
  *
  * This function may be called from:
  * 1. Within call(): Example: Lazy-expire on key access.
  *    In this case the caller doesn't have to do anything
  *    because call() handles server.also_propagate(); or
- * 2. Outside of call(): Example: Active-expire, eviction.
+ * 2. Outside of call(): Example: Active-expire, eviction, slot ownership changed.
  *    In this the caller must remember to call
  *    postExecutionUnitOperations, preferably just after a
- *    single deletion batch, so that DELs will NOT be wrapped
+ *    single deletion batch, so that DEL/UNLINK will NOT be wrapped
  *    in MULTI/EXEC */
 void propagateDeletion(redisDb *db, robj *key, int lazy) {
     robj *argv[2];
@@ -1704,7 +1902,7 @@ void propagateDeletion(redisDb *db, robj *key, int lazy) {
     incrRefCount(argv[0]);
     incrRefCount(argv[1]);
 
-    /* If the master decided to expire a key we must propagate it to replicas no matter what..
+    /* If the master decided to delete a key we must propagate it to replicas no matter what.
      * Even if module executed a command without asking for propagation. */
     int prev_replication_allowed = server.replication_allowed;
     server.replication_allowed = 1;
@@ -1750,7 +1948,7 @@ int keyIsExpired(redisDb *db, robj *key) {
  * propagation of a DEL/UNLINK command in AOF / replication stream.
  *
  * On replicas, this function does not delete expired keys by default, but
- * it still returns 1 if the key is logically expired. To force deletion
+ * it still returns KEY_EXPIRED if the key is logically expired. To force deletion
  * of logically expired keys even on replicas, use the EXPIRE_FORCE_DELETE_EXPIRED
  * flag. Note though that if the current client is executing
  * replicated commands from the master, keys are never considered expired.
@@ -1759,11 +1957,12 @@ int keyIsExpired(redisDb *db, robj *key) {
  * the actual key deletion and propagation of the deletion, use the
  * EXPIRE_AVOID_DELETE_EXPIRED flag.
  *
- * The return value of the function is 0 if the key is still valid,
- * otherwise the function returns 1 if the key is expired. */
-int expireIfNeeded(redisDb *db, robj *key, int flags) {
-    if (server.lazy_expire_disabled) return 0;
-    if (!keyIsExpired(db,key)) return 0;
+ * The return value of the function is KEY_VALID if the key is still valid.
+ * The function returns KEY_EXPIRED if the key is expired BUT not deleted,
+ * or returns KEY_DELETED if the key is expired and deleted. */
+keyStatus expireIfNeeded(redisDb *db, robj *key, int flags) {
+    if (server.lazy_expire_disabled) return KEY_VALID;
+    if (!keyIsExpired(db,key)) return KEY_VALID;
 
     /* If we are running in the context of a replica, instead of
      * evicting the expired key from the database, we return ASAP:
@@ -1773,25 +1972,25 @@ int expireIfNeeded(redisDb *db, robj *key, int flags) {
      * replicas.
      *
      * Still we try to return the right information to the caller,
-     * that is, 0 if we think the key should be still valid, 1 if
-     * we think the key is expired at this time.
+     * that is, KEY_VALID if we think the key should still be valid,
+     * KEY_EXPIRED if we think the key is expired but don't want to delete it at this time.
      *
      * When replicating commands from the master, keys are never considered
      * expired. */
     if (server.masterhost != NULL) {
-        if (server.current_client && (server.current_client->flags & CLIENT_MASTER)) return 0;
-        if (!(flags & EXPIRE_FORCE_DELETE_EXPIRED)) return 1;
+        if (server.current_client && (server.current_client->flags & CLIENT_MASTER)) return KEY_VALID;
+        if (!(flags & EXPIRE_FORCE_DELETE_EXPIRED)) return KEY_EXPIRED;
     }
 
     /* In some cases we're explicitly instructed to return an indication of a
      * missing key without actually deleting it, even on masters. */
     if (flags & EXPIRE_AVOID_DELETE_EXPIRED)
-        return 1;
+        return KEY_EXPIRED;
 
     /* If 'expire' action is paused, for whatever reason, then don't expire any key.
      * Typically, at the end of the pause we will properly expire the key OR we
      * will have failed over and the new primary will send us the expire. */
-    if (isPausedActionsWithUpdate(PAUSE_ACTION_EXPIRE)) return 1;
+    if (isPausedActionsWithUpdate(PAUSE_ACTION_EXPIRE)) return KEY_EXPIRED;
 
     /* The key needs to be converted from static to heap before deleted */
     int static_key = key->refcount == OBJ_STATIC_REFCOUNT;
@@ -1803,7 +2002,68 @@ int expireIfNeeded(redisDb *db, robj *key, int flags) {
     if (static_key) {
         decrRefCount(key);
     }
-    return 1;
+    return KEY_DELETED;
+}
+
+/* CB passed to kvstoreExpand.
+ * The purpose is to skip expansion of unused dicts in cluster mode (all
+ * dicts not mapped to *my* slots) */
+static int dbExpandSkipSlot(int slot) {
+    return !clusterNodeCoversSlot(getMyClusterNode(), slot);
+}
+
+/*
+ * This functions increases size of the main/expires db to match desired number.
+ * In cluster mode resizes all individual dictionaries for slots that this node owns.
+ *
+ * Based on the parameter `try_expand`, appropriate dict expand API is invoked.
+ * if try_expand is set to 1, `dictTryExpand` is used else `dictExpand`.
+ * The return code is either `DICT_OK`/`DICT_ERR` for both the API(s).
+ * `DICT_OK` response is for successful expansion. However ,`DICT_ERR` response signifies failure in allocation in
+ * `dictTryExpand` call and in case of `dictExpand` call it signifies no expansion was performed.
+ */
+static int dbExpandGeneric(kvstore *kvs, uint64_t db_size, int try_expand) {
+    int ret;
+    if (server.cluster_enabled) {
+        /* We don't know exact number of keys that would fall into each slot, but we can
+         * approximate it, assuming even distribution, divide it by the number of slots. */
+        int slots = getMyShardSlotCount();
+        if (slots == 0) return C_OK;
+        db_size = db_size / slots;
+        ret = kvstoreExpand(kvs, db_size, try_expand, dbExpandSkipSlot);
+    } else {
+        ret = kvstoreExpand(kvs, db_size, try_expand, NULL);
+    }
+
+    return ret? C_OK : C_ERR;
+}
+
+int dbExpand(redisDb *db, uint64_t db_size, int try_expand) {
+    return dbExpandGeneric(db->keys, db_size, try_expand);
+}
+
+int dbExpandExpires(redisDb *db, uint64_t db_size, int try_expand) {
+    return dbExpandGeneric(db->expires, db_size, try_expand);
+}
+
+static dictEntry *dbFindGeneric(kvstore *kvs, void *key) {
+    return kvstoreDictFind(kvs, getKeySlot(key), key);
+}
+
+dictEntry *dbFind(redisDb *db, void *key) {
+    return dbFindGeneric(db->keys, key);
+}
+
+dictEntry *dbFindExpires(redisDb *db, void *key) {
+    return dbFindGeneric(db->expires, key);
+}
+
+unsigned long long dbSize(redisDb *db) {
+    return kvstoreSize(db->keys);
+}
+
+unsigned long long dbScan(redisDb *db, unsigned long long cursor, dictScanFunction *scan_cb, void *privdata) {
+    return kvstoreScan(db->keys, cursor, -1, scan_cb, NULL, privdata);
 }
 
 /* -----------------------------------------------------------------------------
@@ -2283,7 +2543,8 @@ int sortROGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult
     keys = getKeysPrepareResult(result, 1);
     keys[0].pos = 1; /* <sort-key> is always present. */
     keys[0].flags = CMD_KEY_RO | CMD_KEY_ACCESS;
-    return 1;
+    result->numkeys = 1;
+    return result->numkeys;
 }
 
 /* Helper function to extract keys from the SORT command.
diff --git a/src/debug.c b/src/debug.c
index a57b1fde9a3..b774ccc656b 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -1,31 +1,9 @@
 /*
- * Copyright (c) 2009-2020, Salvatore Sanfilippo <antirez at gmail dot com>
- * Copyright (c) 2020, Redis Labs, Inc
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -36,6 +14,8 @@
 #include "quicklist.h"
 #include "fpconv_dtoa.h"
 #include "cluster.h"
+#include "threads_mngr.h"
+#include "script.h"
 
 #include <arpa/inet.h>
 #include <signal.h>
@@ -66,12 +46,16 @@ typedef ucontext_t sigcontext_t;
 /* Globals */
 static int bug_report_start = 0; /* True if bug report header was already logged. */
 static pthread_mutex_t bug_report_start_mutex = PTHREAD_MUTEX_INITIALIZER;
-
+/* Mutex for a case when two threads crash at the same time. */
+static pthread_mutex_t signal_handler_lock;
+static pthread_mutexattr_t signal_handler_lock_attr;
+static volatile int signal_handler_lock_initialized = 0;
 /* Forward declarations */
-void bugReportStart(void);
+int bugReportStart(void);
 void printCrashReport(void);
 void bugReportEnd(int killViaSignal, int sig);
-void logStackTrace(void *eip, int uplevel);
+void logStackTrace(void *eip, int uplevel, int current_thread);
+void sigalrmSignalHandler(int sig, siginfo_t *info, void *secret);
 
 /* ================================= Debugging ============================== */
 
@@ -217,17 +201,22 @@ void xorObjectDigest(redisDb *db, robj *keyobj, unsigned char *digest, robj *o)
         }
     } else if (o->type == OBJ_HASH) {
         hashTypeIterator *hi = hashTypeInitIterator(o);
-        while (hashTypeNext(hi) != C_ERR) {
+        while (hashTypeNext(hi, 0) != C_ERR) {
             unsigned char eledigest[20];
             sds sdsele;
 
+            /* field */
             memset(eledigest,0,20);
             sdsele = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_KEY);
             mixDigest(eledigest,sdsele,sdslen(sdsele));
             sdsfree(sdsele);
+            /* val */
             sdsele = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_VALUE);
             mixDigest(eledigest,sdsele,sdslen(sdsele));
             sdsfree(sdsele);
+            /* hash-field expiration (HFE) */
+            if (hi->expire_time != EB_EXPIRE_TIME_INVALID)
+                xorDigest(eledigest,"!!hexpire!!",11);
             xorDigest(digest,eledigest,20);
         }
         hashTypeReleaseIterator(hi);
@@ -276,7 +265,6 @@ void xorObjectDigest(redisDb *db, robj *keyobj, unsigned char *digest, robj *o)
  * a different digest. */
 void computeDatasetDigest(unsigned char *final) {
     unsigned char digest[20];
-    dictIterator *di = NULL;
     dictEntry *de;
     int j;
     uint32_t aux;
@@ -285,17 +273,16 @@ void computeDatasetDigest(unsigned char *final) {
 
     for (j = 0; j < server.dbnum; j++) {
         redisDb *db = server.db+j;
+        if (kvstoreSize(db->keys) == 0)
+            continue;
+        kvstoreIterator *kvs_it = kvstoreIteratorInit(db->keys);
 
-        if (dictSize(db->dict) == 0) continue;
-        di = dictGetSafeIterator(db->dict);
-
-        /* hash the DB id, so the same dataset moved in a different
-         * DB will lead to a different digest */
+        /* hash the DB id, so the same dataset moved in a different DB will lead to a different digest */
         aux = htonl(j);
         mixDigest(final,&aux,sizeof(aux));
 
         /* Iterate this DB writing every entry */
-        while((de = dictNext(di)) != NULL) {
+        while((de = kvstoreIteratorNext(kvs_it)) != NULL) {
             sds key;
             robj *keyobj, *o;
 
@@ -312,7 +299,7 @@ void computeDatasetDigest(unsigned char *final) {
             xorDigest(final,digest,20);
             decrRefCount(keyobj);
         }
-        dictReleaseIterator(di);
+        kvstoreIteratorRelease(kvs_it);
     }
 }
 
@@ -464,9 +451,9 @@ void debugCommand(client *c) {
 "SEGFAULT",
 "    Crash the server with sigsegv.",
 "SET-ACTIVE-EXPIRE <0|1>",
-"    Setting it to 0 disables expiring keys in background when they are not",
-"    accessed (otherwise the Redis behavior). Setting it to 1 reenables back the",
-"    default.",
+"    Setting it to 0 disables expiring keys (and hash-fields) in background ",
+"    when they are not accessed (otherwise the Redis behavior). Setting it",
+"    to 1 reenables back the default.",
 "QUICKLIST-PACKED-THRESHOLD <size>",
 "    Sets the threshold for elements to be inserted as plain vs packed nodes",
 "    Default value is 1GB, allows values up to 4GB. Setting to 0 restores to default.",
@@ -493,11 +480,11 @@ void debugCommand(client *c) {
 "    In case RESET is provided the peak reset time will be restored to the default value",
 "REPLYBUFFER RESIZING <0|1>",
 "    Enable or disable the reply buffer resize cron job",
-"CLUSTERLINK KILL <to|from|all> <node-id>",
-"    Kills the link based on the direction to/from (both) with the provided node." ,
+"DICT-RESIZING <0|1>",
+"    Enable or disable the main dict and expire dict resizing.",
 NULL
         };
-        addReplyHelp(c, help);
+        addExtendedReplyHelp(c, help, clusterDebugCommandExtendedHelp());
     } else if (!strcasecmp(c->argv[1]->ptr,"segfault")) {
         /* Compiler gives warnings about writing to a random address
          * e.g "*((char*)-1) = 'x';". As a workaround, we map a read-only area
@@ -605,7 +592,7 @@ NULL
         robj *val;
         char *strenc;
 
-        if ((de = dictFind(c->db->dict,c->argv[2]->ptr)) == NULL) {
+        if ((de = dbFind(c->db, c->argv[2]->ptr)) == NULL) {
             addReplyErrorObject(c,shared.nokeyerr);
             return;
         }
@@ -657,7 +644,7 @@ NULL
         robj *val;
         sds key;
 
-        if ((de = dictFind(c->db->dict,c->argv[2]->ptr)) == NULL) {
+        if ((de = dbFind(c->db, c->argv[2]->ptr)) == NULL) {
             addReplyErrorObject(c,shared.nokeyerr);
             return;
         }
@@ -683,10 +670,14 @@ NULL
         if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nokeyerr))
                 == NULL) return;
 
-        if (o->encoding != OBJ_ENCODING_LISTPACK) {
+        if (o->encoding != OBJ_ENCODING_LISTPACK && o->encoding != OBJ_ENCODING_LISTPACK_EX) {
             addReplyError(c,"Not a listpack encoded object.");
         } else {
-            lpRepr(o->ptr);
+            if (o->encoding == OBJ_ENCODING_LISTPACK)
+                lpRepr(o->ptr);
+            else if (o->encoding == OBJ_ENCODING_LISTPACK_EX)
+                lpRepr(((listpackEx*)o->ptr)->lp);
+
             addReplyStatus(c,"Listpack structure printed on stdout");
         }
     } else if (!strcasecmp(c->argv[1]->ptr,"quicklist") && (c->argc == 3 || c->argc == 4)) {
@@ -713,7 +704,12 @@ NULL
         if (getPositiveLongFromObjectOrReply(c, c->argv[2], &keys, NULL) != C_OK)
             return;
 
-        if (dictTryExpand(c->db->dict, keys) != DICT_OK) {
+        if (server.loading || server.async_loading) {
+            addReplyErrorObject(c, shared.loadingerr);
+            return;
+        }
+
+        if (dbExpand(c->db, keys, 1) == C_ERR) {
             addReplyError(c, "OOM in dictTryExpand");
             return;
         }
@@ -761,7 +757,7 @@ NULL
             /* We don't use lookupKey because a debug command should
              * work on logically expired keys */
             dictEntry *de;
-            robj *o = ((de = dictFind(c->db->dict,c->argv[j]->ptr)) == NULL) ? NULL : dictGetVal(de);
+            robj *o = ((de = dbFind(c->db, c->argv[j]->ptr)) == NULL) ? NULL : dictGetVal(de);
             if (o) xorObjectDigest(c->db,c->argv[j],digest,o);
 
             sds d = sdsempty();
@@ -849,7 +845,7 @@ NULL
     {
         int memerr;
         unsigned long long sz = memtoull((const char *)c->argv[2]->ptr, &memerr);
-        if (memerr || !quicklistisSetPackedThreshold(sz)) {
+        if (memerr || !quicklistSetPackedThreshold(sz)) {
             addReplyError(c, "argument must be a memory value bigger than 1 and smaller than 4gb");
         } else {
             addReply(c,shared.ok);
@@ -905,11 +901,11 @@ NULL
             full = 1;
 
         stats = sdscatprintf(stats,"[Dictionary HT]\n");
-        dictGetStats(buf,sizeof(buf),server.db[dbid].dict,full);
+        kvstoreGetStats(server.db[dbid].keys, buf, sizeof(buf), full);
         stats = sdscat(stats,buf);
 
         stats = sdscatprintf(stats,"[Expires HT]\n");
-        dictGetStats(buf,sizeof(buf),server.db[dbid].expires,full);
+        kvstoreGetStats(server.db[dbid].expires, buf, sizeof(buf), full);
         stats = sdscat(stats,buf);
 
         addReplyVerbatim(c,stats,sdslen(stats),"txt");
@@ -1015,34 +1011,33 @@ NULL
             return;
         }
         addReply(c, shared.ok);
-    } else if(!strcasecmp(c->argv[1]->ptr,"CLUSTERLINK") &&
-        !strcasecmp(c->argv[2]->ptr,"KILL") &&
-        c->argc == 5) {
-        if (!server.cluster_enabled) {
-            addReplyError(c, "Debug option only available for cluster mode enabled setup!");
-            return;
-        }
-
-        /* Find the node. */
-        clusterNode *n = clusterLookupNode(c->argv[4]->ptr, sdslen(c->argv[4]->ptr));
-        if (!n) {
-            addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[4]->ptr);
-            return;
-        }
-
-        /* Terminate the link based on the direction or all. */
-        if (!strcasecmp(c->argv[3]->ptr,"from")) {
-            freeClusterLink(n->inbound_link);
-        } else if (!strcasecmp(c->argv[3]->ptr,"to")) {
-            freeClusterLink(n->link);
-        } else if (!strcasecmp(c->argv[3]->ptr,"all")) {
-            freeClusterLink(n->link);
-            freeClusterLink(n->inbound_link);
+    } else if (!strcasecmp(c->argv[1]->ptr, "dict-resizing") && c->argc == 3) {
+        server.dict_resizing = atoi(c->argv[2]->ptr);
+        addReply(c, shared.ok);
+    } else if (!strcasecmp(c->argv[1]->ptr,"script") && c->argc == 3) {
+        if (!strcasecmp(c->argv[2]->ptr,"list")) {
+            dictIterator *di = dictGetIterator(getLuaScripts());
+            dictEntry *de;
+            while ((de = dictNext(di)) != NULL) {
+                luaScript *script = dictGetVal(de);
+                sds *sha = dictGetKey(de);
+                serverLog(LL_WARNING, "SCRIPT SHA: %s\n%s", (char*)sha, (char*)script->body->ptr);
+            }
+            dictReleaseIterator(di);
+        } else if (sdslen(c->argv[2]->ptr) == 40) {
+            dictEntry *de;
+            if ((de = dictFind(getLuaScripts(), c->argv[2]->ptr)) == NULL) {
+                addReplyErrorObject(c, shared.noscripterr);
+                return;
+            }
+            luaScript *script = dictGetVal(de);
+            serverLog(LL_WARNING, "SCRIPT SHA: %s\n%s", (char*)c->argv[2]->ptr, (char*)script->body->ptr);
         } else {
-            addReplyErrorFormat(c, "Unknown direction %s", (char*) c->argv[3]->ptr);
+            addReplySubcommandSyntaxError(c);
+            return;
         }
         addReply(c,shared.ok);
-    } else {
+    } else if(!handleDebugClusterCommand(c)) {
         addReplySubcommandSyntaxError(c);
         return;
     }
@@ -1050,20 +1045,23 @@ NULL
 
 /* =========================== Crash handling  ============================== */
 
+__attribute__ ((noinline))
 void _serverAssert(const char *estr, const char *file, int line) {
-    bugReportStart();
-    serverLog(LL_WARNING,"=== ASSERTION FAILED ===");
+    int new_report = bugReportStart();
+    serverLog(LL_WARNING,"=== %sASSERTION FAILED ===", new_report ? "" : "RECURSIVE ");
     serverLog(LL_WARNING,"==> %s:%d '%s' is not true",file,line,estr);
 
     if (server.crashlog_enabled) {
 #ifdef HAVE_BACKTRACE
-        logStackTrace(NULL, 1);
+        logStackTrace(NULL, 1, 0);
 #endif
-        printCrashReport();
+        /* If this was a recursive assertion, it what most likely generated
+         * from printCrashReport. */
+        if (new_report) printCrashReport();
     }
 
     // remove the signal handler so on abort() we will output the crash report.
-    removeSignalHandlers();
+    removeSigSegvHandlers();
     bugReportEnd(0, 0);
 }
 
@@ -1116,7 +1114,7 @@ void serverLogObjectDebugInfo(const robj *o) {
     } else if (o->type == OBJ_SET) {
         serverLog(LL_WARNING,"Set size: %d", (int) setTypeSize(o));
     } else if (o->type == OBJ_HASH) {
-        serverLog(LL_WARNING,"Hash size: %d", (int) hashTypeLength(o));
+        serverLog(LL_WARNING,"Hash size: %d", (int) hashTypeLength(o, 0));
     } else if (o->type == OBJ_ZSET) {
         serverLog(LL_WARNING,"Sorted set size: %d", (int) zsetLength(o));
         if (o->encoding == OBJ_ENCODING_SKIPLIST)
@@ -1139,6 +1137,7 @@ void _serverAssertWithInfo(const client *c, const robj *o, const char *estr, con
     _serverAssert(estr,file,line);
 }
 
+__attribute__ ((noinline))
 void _serverPanic(const char *file, int line, const char *msg, ...) {
     va_list ap;
     va_start(ap,msg);
@@ -1146,31 +1145,37 @@ void _serverPanic(const char *file, int line, const char *msg, ...) {
     vsnprintf(fmtmsg,sizeof(fmtmsg),msg,ap);
     va_end(ap);
 
-    bugReportStart();
+    int new_report = bugReportStart();
     serverLog(LL_WARNING,"------------------------------------------------");
     serverLog(LL_WARNING,"!!! Software Failure. Press left mouse button to continue");
     serverLog(LL_WARNING,"Guru Meditation: %s #%s:%d",fmtmsg,file,line);
 
     if (server.crashlog_enabled) {
 #ifdef HAVE_BACKTRACE
-        logStackTrace(NULL, 1);
+        logStackTrace(NULL, 1, 0);
 #endif
-        printCrashReport();
+        /* If this was a recursive panic, it what most likely generated
+         * from printCrashReport. */
+        if (new_report) printCrashReport();
     }
 
     // remove the signal handler so on abort() we will output the crash report.
-    removeSignalHandlers();
+    removeSigSegvHandlers();
     bugReportEnd(0, 0);
 }
 
-void bugReportStart(void) {
+/* Start a bug report, returning 1 if this is the first time this function was called, 0 otherwise. */
+int bugReportStart(void) {
     pthread_mutex_lock(&bug_report_start_mutex);
     if (bug_report_start == 0) {
         serverLogRaw(LL_WARNING|LL_RAW,
         "\n\n=== REDIS BUG REPORT START: Cut & paste starting from here ===\n");
         bug_report_start = 1;
+        pthread_mutex_unlock(&bug_report_start_mutex);
+        return 1;
     }
     pthread_mutex_unlock(&bug_report_start_mutex);
+    return 0;
 }
 
 #ifdef HAVE_BACKTRACE
@@ -1190,7 +1195,7 @@ static void* getAndSetMcontextEip(ucontext_t *uc, void *eip) {
     } \
     return old_val; \
 } while(0)
-#if defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
+#if defined(__APPLE__) && !defined(MAC_OS_10_6_DETECTED)
     /* OSX < 10.6 */
     #if defined(__x86_64__)
     GET_SET_RETURN(uc->uc_mcontext->__ss.__rip, eip);
@@ -1199,7 +1204,7 @@ static void* getAndSetMcontextEip(ucontext_t *uc, void *eip) {
     #else
     GET_SET_RETURN(uc->uc_mcontext->__ss.__srr0, eip);
     #endif
-#elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
+#elif defined(__APPLE__) && defined(MAC_OS_10_6_DETECTED)
     /* OSX >= 10.6 */
     #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
     GET_SET_RETURN(uc->uc_mcontext->__ss.__rip, eip);
@@ -1290,7 +1295,7 @@ void logRegisters(ucontext_t *uc) {
 } while(0)
 
 /* OSX */
-#if defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
+#if defined(__APPLE__) && defined(MAC_OS_10_6_DETECTED)
   /* OSX AMD64 */
     #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
     serverLog(LL_WARNING,
@@ -1815,24 +1820,132 @@ void closeDirectLogFiledes(int fd) {
     if (!log_to_stdout) close(fd);
 }
 
+#if defined(HAVE_BACKTRACE) && defined(__linux__)
+static int stacktrace_pipe[2] = {0};
+static void setupStacktracePipe(void) {
+    if (-1 == anetPipe(stacktrace_pipe, O_CLOEXEC | O_NONBLOCK, O_CLOEXEC | O_NONBLOCK)) {
+        serverLog(LL_WARNING, "setupStacktracePipe failed: %s", strerror(errno));
+    }
+}
+#else
+static void setupStacktracePipe(void) {/* we don't need a pipe to write the stacktraces */}
+#endif
 #ifdef HAVE_BACKTRACE
+#define BACKTRACE_MAX_SIZE 100
+
+#ifdef __linux__
+#if !defined(_GNU_SOURCE)
+#define _GNU_SOURCE
+#endif
+#include <sys/prctl.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <dirent.h>
+
+#define TIDS_MAX_SIZE 50
+static size_t get_ready_to_signal_threads_tids(int sig_num, pid_t tids[TIDS_MAX_SIZE]);
+
+typedef struct {
+    char thread_name[16];
+    int trace_size;
+    pid_t tid;
+    void *trace[BACKTRACE_MAX_SIZE];
+} stacktrace_data;
+
+__attribute__ ((noinline)) static void collect_stacktrace_data(void) {
+    stacktrace_data trace_data = {{0}};
+
+    /* Get the stack trace first! */
+    trace_data.trace_size = backtrace(trace_data.trace, BACKTRACE_MAX_SIZE);
+
+    /* get the thread name */
+    prctl(PR_GET_NAME, trace_data.thread_name);
+
+    /* get the thread id */
+    trace_data.tid = syscall(SYS_gettid);
+
+    /* Send the output to the main process*/
+    if (write(stacktrace_pipe[1], &trace_data, sizeof(trace_data)) == -1) {/* Avoid warning. */};
+}
+
+__attribute__ ((noinline))
+static void writeStacktraces(int fd, int uplevel) {
+    /* get the list of all the process's threads that don't block or ignore the THREADS_SIGNAL */
+    pid_t tids[TIDS_MAX_SIZE];
+    size_t len_tids = get_ready_to_signal_threads_tids(THREADS_SIGNAL, tids);
+    if (!len_tids) {
+        serverLogRawFromHandler(LL_WARNING, "writeStacktraces(): Failed to get the process's threads.");
+    }
+
+    char buff[PIPE_BUF];
+    /* Clear the stacktraces pipe */
+    while (read(stacktrace_pipe[0], &buff, sizeof(buff)) > 0) {}
+
+    /* ThreadsManager_runOnThreads returns 0 if it is already running */
+    if (!ThreadsManager_runOnThreads(tids, len_tids, collect_stacktrace_data)) return;
+
+    size_t collected = 0;
+
+    pid_t calling_tid = syscall(SYS_gettid);
+
+    /* Read the stacktrace_pipe until it's empty */
+    stacktrace_data curr_stacktrace_data = {{0}};
+    while (read(stacktrace_pipe[0], &curr_stacktrace_data, sizeof(curr_stacktrace_data)) > 0) {
+        /* stacktrace header includes the tid and the thread's name */
+        snprintf_async_signal_safe(buff, sizeof(buff), "\n%d %s", curr_stacktrace_data.tid, curr_stacktrace_data.thread_name);
+        if (write(fd,buff,strlen(buff)) == -1) {/* Avoid warning. */};
+
+        /* skip kernel call to the signal handler, the signal handler and the callback addresses */
+        int curr_uplevel = 3;
+
+        if (curr_stacktrace_data.tid == calling_tid) {
+            /* skip signal syscall and ThreadsManager_runOnThreads */
+            curr_uplevel += uplevel + 2;
+            /* Add an indication to header of the thread that is handling the log file */
+            if (write(fd," *\n",strlen(" *\n")) == -1) {/* Avoid warning. */};
+        } else {
+            /* just add a new line */
+            if (write(fd,"\n",strlen("\n")) == -1) {/* Avoid warning. */};
+        }
+
+        /* add the stacktrace */
+        backtrace_symbols_fd(curr_stacktrace_data.trace+curr_uplevel, curr_stacktrace_data.trace_size-curr_uplevel, fd);
+
+        ++collected;
+    }
+
+    snprintf_async_signal_safe(buff, sizeof(buff), "\n%lu/%lu expected stacktraces.\n", (long unsigned)(collected), (long unsigned)len_tids);
+    if (write(fd,buff,strlen(buff)) == -1) {/* Avoid warning. */};
+
+}
+
+#endif /* __linux__ */
+__attribute__ ((noinline))
+static void writeCurrentThreadsStackTrace(int fd, int uplevel) {
+    void *trace[BACKTRACE_MAX_SIZE];
+
+    int trace_size = backtrace(trace, BACKTRACE_MAX_SIZE);
+
+    char *msg = "\nBacktrace:\n";
+    if (write(fd,msg,strlen(msg)) == -1) {/* Avoid warning. */};
+    backtrace_symbols_fd(trace+uplevel, trace_size-uplevel, fd);
+}
 
 /* Logs the stack trace using the backtrace() call. This function is designed
  * to be called from signal handlers safely.
  * The eip argument is optional (can take NULL).
  * The uplevel argument indicates how many of the calling functions to skip.
+ * Functions that are taken in consideration in "uplevel" should be declared with
+ * __attribute__ ((noinline)) to make sure the compiler won't inline them.
  */
-void logStackTrace(void *eip, int uplevel) {
-    void *trace[100];
-    int trace_size = 0, fd = openDirectLogFiledes();
+__attribute__ ((noinline))
+void logStackTrace(void *eip, int uplevel, int current_thread) {
+    int fd = openDirectLogFiledes();
     char *msg;
     uplevel++; /* skip this function */
 
     if (fd == -1) return; /* If we can't log there is anything to do. */
 
-    /* Get the stack trace first! */
-    trace_size = backtrace(trace, 100);
-
     msg = "\n------ STACK TRACE ------\n";
     if (write(fd,msg,strlen(msg)) == -1) {/* Avoid warning. */};
 
@@ -1844,9 +1957,21 @@ void logStackTrace(void *eip, int uplevel) {
     }
 
     /* Write symbols to log file */
-    msg = "\nBacktrace:\n";
+    ++uplevel;
+#ifdef __linux__
+    if (current_thread) {
+        writeCurrentThreadsStackTrace(fd, uplevel);
+    } else {
+        writeStacktraces(fd, uplevel);
+    }
+#else
+    /* Outside of linux, we only support writing the current thread. */
+    UNUSED(current_thread);
+    writeCurrentThreadsStackTrace(fd, uplevel);
+#endif
+    msg = "\n------ STACK TRACE DONE ------\n";
     if (write(fd,msg,strlen(msg)) == -1) {/* Avoid warning. */};
-    backtrace_symbols_fd(trace+uplevel, trace_size-uplevel, fd);
+
 
     /* Cleanup */
     closeDirectLogFiledes(fd);
@@ -1855,11 +1980,17 @@ void logStackTrace(void *eip, int uplevel) {
 #endif /* HAVE_BACKTRACE */
 
 sds genClusterDebugString(sds infostring) {
+    sds cluster_info = genClusterInfoString();
+    sds cluster_nodes = clusterGenNodesDescription(NULL, 0, 0);
+
     infostring = sdscatprintf(infostring, "\r\n# Cluster info\r\n");
-    infostring = sdscatsds(infostring, genClusterInfoString()); 
+    infostring = sdscatsds(infostring, cluster_info);
     infostring = sdscatprintf(infostring, "\n------ CLUSTER NODES OUTPUT ------\n");
-    infostring = sdscatsds(infostring, clusterGenNodesDescription(NULL, 0, 0));
-    
+    infostring = sdscatsds(infostring, cluster_nodes);
+
+    sdsfree(cluster_info);
+    sdsfree(cluster_nodes);
+
     return infostring;
 }
 
@@ -1936,7 +2067,7 @@ void logCurrentClient(client *cc, const char *title) {
         dictEntry *de;
 
         key = getDecodedObject(cc->argv[1]);
-        de = dictFind(cc->db->dict, key->ptr);
+        de = dbFind(cc->db, key->ptr);
         if (de) {
             val = dictGetVal(de);
             serverLog(LL_WARNING,"key '%s' found in DB containing the following object:", (char*)key->ptr);
@@ -1961,7 +2092,7 @@ int memtest_test_linux_anonymous_maps(void) {
     int regions = 0, j;
 
     int fd = openDirectLogFiledes();
-    if (!fd) return 0;
+    if (fd == -1) return 0;
 
     fp = fopen("/proc/self/maps","r");
     if (!fp) {
@@ -2116,9 +2247,19 @@ void invalidFunctionWasCalled(void) {}
 
 typedef void (*invalidFunctionWasCalledType)(void);
 
-void sigsegvHandler(int sig, siginfo_t *info, void *secret) {
+__attribute__ ((noinline))
+static void sigsegvHandler(int sig, siginfo_t *info, void *secret) {
     UNUSED(secret);
     UNUSED(info);
+    int print_full_crash_info = 1;
+    /* Check if it is safe to enter the signal handler. second thread crashing at the same time will deadlock. */
+    if(pthread_mutex_lock(&signal_handler_lock) == EDEADLK) {
+        /* If this thread already owns the lock (meaning we crashed during handling a signal) switch
+         * to printing the minimal information about the crash. */
+        serverLogRawFromHandler(LL_WARNING,
+            "Crashed running signal handler. Providing reduced version of recursive crash report.");
+        print_full_crash_info = 0;
+    }
 
     bugReportStart();
     serverLog(LL_WARNING,
@@ -2151,7 +2292,9 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) {
         getAndSetMcontextEip(uc, ptr);
     }
 
-    logStackTrace(eip, 1);
+    /* When printing the reduced crash info, just print the current thread
+     * to avoid race conditions with the multi-threaded stack collector. */
+    logStackTrace(eip, 1, !print_full_crash_info);
 
     if (eip == info->si_addr) {
         /* Restore old eip */
@@ -2161,7 +2304,7 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) {
     logRegisters(uc);
 #endif
 
-    printCrashReport();
+    if (print_full_crash_info) printCrashReport();
 
 #ifdef HAVE_BACKTRACE
     if (eip != NULL)
@@ -2171,6 +2314,60 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) {
     bugReportEnd(1, sig);
 }
 
+void setupDebugSigHandlers(void) {
+    setupStacktracePipe();
+
+    setupSigSegvHandler();
+
+    struct sigaction act;
+
+    sigemptyset(&act.sa_mask);
+    act.sa_flags = SA_SIGINFO;
+    act.sa_sigaction = sigalrmSignalHandler;
+    sigaction(SIGALRM, &act, NULL);
+}
+
+void setupSigSegvHandler(void) {
+    /* Initialize the signal handler lock.
+    Attempting to initialize an already initialized mutex or mutexattr results in undefined behavior. */
+    if (!signal_handler_lock_initialized) {
+        /* Set signal handler with error checking attribute. re-lock within the same thread will error. */
+        pthread_mutexattr_init(&signal_handler_lock_attr);
+        pthread_mutexattr_settype(&signal_handler_lock_attr, PTHREAD_MUTEX_ERRORCHECK);
+        pthread_mutex_init(&signal_handler_lock, &signal_handler_lock_attr);
+        signal_handler_lock_initialized = 1;
+    }
+
+    struct sigaction act;
+
+    sigemptyset(&act.sa_mask);
+    /* SA_NODEFER to disables adding the signal to the signal mask of the
+     * calling process on entry to the signal handler unless it is included in the sa_mask field. */
+    /* SA_SIGINFO flag is set to raise the function defined in sa_sigaction.
+     * Otherwise, sa_handler is used. */
+    act.sa_flags = SA_NODEFER | SA_SIGINFO;
+    act.sa_sigaction = sigsegvHandler;
+    if(server.crashlog_enabled) {
+        sigaction(SIGSEGV, &act, NULL);
+        sigaction(SIGBUS, &act, NULL);
+        sigaction(SIGFPE, &act, NULL);
+        sigaction(SIGILL, &act, NULL);
+        sigaction(SIGABRT, &act, NULL);
+    }
+}
+
+void removeSigSegvHandlers(void) {
+    struct sigaction act;
+    sigemptyset(&act.sa_mask);
+    act.sa_flags = SA_NODEFER | SA_RESETHAND;
+    act.sa_handler = SIG_DFL;
+    sigaction(SIGSEGV, &act, NULL);
+    sigaction(SIGBUS, &act, NULL);
+    sigaction(SIGFPE, &act, NULL);
+    sigaction(SIGILL, &act, NULL);
+    sigaction(SIGABRT, &act, NULL);
+}
+
 void printCrashReport(void) {
     /* Log INFO and CLIENT LIST */
     logServerInfo();
@@ -2193,7 +2390,7 @@ void printCrashReport(void) {
 void bugReportEnd(int killViaSignal, int sig) {
     struct sigaction act;
 
-    serverLogRaw(LL_WARNING|LL_RAW,
+    serverLogRawFromHandler(LL_WARNING|LL_RAW,
 "\n=== REDIS BUG REPORT END. Make sure to include from START to END. ===\n\n"
 "       Please report the crash by opening an issue on github:\n\n"
 "           http://github.com/redis/redis/issues\n\n"
@@ -2206,7 +2403,7 @@ void bugReportEnd(int killViaSignal, int sig) {
     if (server.daemonize && server.supervised == 0 && server.pidfile) unlink(server.pidfile);
 
     if (!killViaSignal) {
-        /* To avoid issues with valgrind, we may wanna exit rahter than generate a signal */
+        /* To avoid issues with valgrind, we may wanna exit rather than generate a signal */
         if (server.use_exit_on_panic) {
              /* Using _exit to bypass false leak reports by gcc ASAN */
              fflush(stdout);
@@ -2218,7 +2415,7 @@ void bugReportEnd(int killViaSignal, int sig) {
     /* Make sure we exit with the right signal at the end. So for instance
      * the core will be dumped if enabled. */
     sigemptyset (&act.sa_mask);
-    act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND;
+    act.sa_flags = 0;
     act.sa_handler = SIG_DFL;
     sigaction (sig, &act, NULL);
     kill(getpid(),sig);
@@ -2251,22 +2448,27 @@ void serverLogHexDump(int level, char *descr, void *value, size_t len) {
 /* =========================== Software Watchdog ============================ */
 #include <sys/time.h>
 
-void watchdogSignalHandler(int sig, siginfo_t *info, void *secret) {
+void sigalrmSignalHandler(int sig, siginfo_t *info, void *secret) {
 #ifdef HAVE_BACKTRACE
     ucontext_t *uc = (ucontext_t*) secret;
 #else
     (void)secret;
 #endif
-    UNUSED(info);
     UNUSED(sig);
 
-    serverLogFromHandler(LL_WARNING,"\n--- WATCHDOG TIMER EXPIRED ---");
+    /* SIGALRM can be sent explicitly to the process calling kill() to get the stacktraces,
+       or every watchdog_period interval. In the last case, si_pid is not set */
+    if(info->si_pid == 0) {
+        serverLogRawFromHandler(LL_WARNING,"\n--- WATCHDOG TIMER EXPIRED ---");
+    } else {
+        serverLogRawFromHandler(LL_WARNING, "\nReceived SIGALRM");
+    }
 #ifdef HAVE_BACKTRACE
-    logStackTrace(getAndSetMcontextEip(uc, NULL), 1);
+    logStackTrace(getAndSetMcontextEip(uc, NULL), 1, 0);
 #else
-    serverLogFromHandler(LL_WARNING,"Sorry: no support for backtrace().");
+    serverLogRawFromHandler(LL_WARNING,"Sorry: no support for backtrace().");
 #endif
-    serverLogFromHandler(LL_WARNING,"--------\n");
+    serverLogRawFromHandler(LL_WARNING,"--------\n");
 }
 
 /* Schedule a SIGALRM delivery after the specified period in milliseconds.
@@ -2284,25 +2486,10 @@ void watchdogScheduleSignal(int period) {
     setitimer(ITIMER_REAL, &it, NULL);
 }
 void applyWatchdogPeriod(void) {
-    struct sigaction act;
-
     /* Disable watchdog when period is 0 */
     if (server.watchdog_period == 0) {
         watchdogScheduleSignal(0); /* Stop the current timer. */
-
-        /* Set the signal handler to SIG_IGN, this will also remove pending
-         * signals from the queue. */
-        sigemptyset(&act.sa_mask);
-        act.sa_flags = 0;
-        act.sa_handler = SIG_IGN;
-        sigaction(SIGALRM, &act, NULL);
     } else {
-        /* Setup the signal handler. */
-        sigemptyset(&act.sa_mask);
-        act.sa_flags = SA_SIGINFO;
-        act.sa_sigaction = watchdogSignalHandler;
-        sigaction(SIGALRM, &act, NULL);
-
         /* If the configured period is smaller than twice the timer period, it is
          * too short for the software watchdog to work reliably. Fix it now
          * if needed. */
@@ -2320,3 +2507,145 @@ void debugDelay(int usec) {
     if (usec < 0) usec = (rand() % -usec) == 0 ? 1: 0;
     if (usec) usleep(usec);
 }
+
+#ifdef HAVE_BACKTRACE
+#ifdef __linux__
+
+/* =========================== Stacktrace Utils ============================ */
+
+
+
+/** If it doesn't block and doesn't ignore, return 1 (the thread will handle the signal)
+ * If thread tid blocks or ignores sig_num returns 0 (thread is not ready to catch the signal).
+ * also returns 0 if something is wrong and prints a warning message to the log file **/
+static int is_thread_ready_to_signal(const char *proc_pid_task_path, const char *tid, int sig_num) {
+    /* Open the threads status file path /proc/<pid>>/task/<tid>/status */
+    char path_buff[PATH_MAX];
+    snprintf_async_signal_safe(path_buff, PATH_MAX, "%s/%s/status", proc_pid_task_path, tid);
+
+    int thread_status_file = open(path_buff, O_RDONLY);
+    char buff[PATH_MAX];
+    if (thread_status_file == -1) {
+        serverLogFromHandler(LL_WARNING, "tid:%s: failed to open %s file", tid, path_buff);
+        return 0;
+    }
+
+    int ret = 1;
+    size_t field_name_len = strlen("SigBlk:\t"); /* SigIgn has the same length */
+    char *line = NULL;
+    size_t fields_count = 2;
+    while ((line = fgets_async_signal_safe(buff, PATH_MAX, thread_status_file)) && fields_count) {
+        /* iterate the file until we reach SigBlk or SigIgn field line */
+        if (!strncmp(buff, "SigBlk:\t", field_name_len) ||  !strncmp(buff, "SigIgn:\t", field_name_len)) {
+            line = buff + field_name_len;
+            unsigned long sig_mask;
+            if (-1 == string2ul_base16_async_signal_safe(line, sizeof(buff), &sig_mask)) {
+                serverLogRawFromHandler(LL_WARNING, "Can't convert signal mask to an unsigned long due to an overflow");
+                ret = 0;
+                break;
+            }
+
+            /* The bit position in a signal mask aligns with the signal number. Since signal numbers start from 1 
+            we need to adjust the signal number by subtracting 1 to align it correctly with the zero-based indexing used */
+            if (sig_mask & (1L << (sig_num - 1))) { /* if the signal is blocked/ignored return 0 */
+                ret = 0;
+                break;
+            }
+            --fields_count;
+        }
+    }
+
+    close(thread_status_file);
+
+    /* if we reached EOF, it means we haven't found SigBlk or/and SigIgn, something is wrong */
+    if (line == NULL)  {
+        ret = 0;
+        serverLogFromHandler(LL_WARNING, "tid:%s: failed to find SigBlk or/and SigIgn field(s) in %s/%s/status file", tid, proc_pid_task_path, tid);
+    }
+    return ret;
+}
+
+/** We are using syscall(SYS_getdents64) to read directories, which unlike opendir(), is considered 
+ * async-signal-safe. This function wrapper getdents64() in glibc is supported as of glibc 2.30.
+ * To support earlier versions of glibc, we use syscall(SYS_getdents64), which requires defining
+ * linux_dirent64 ourselves. This structure is very old and stable: It will not change unless the kernel
+ * chooses to break compatibility with all existing binaries. Highly Unlikely.
+*/
+struct linux_dirent64 {
+   unsigned long long d_ino;
+   long long d_off;
+   unsigned short d_reclen;     /* Length of this linux_dirent */
+   unsigned char  d_type;
+   char           d_name[256];  /* Filename (null-terminated) */
+};
+
+/** Returns the number of the process's threads that can receive signal sig_num.
+ * Writes into tids the tids of these threads.
+ * If it fails, returns 0.
+*/
+static size_t get_ready_to_signal_threads_tids(int sig_num, pid_t tids[TIDS_MAX_SIZE]) {
+    /* Open /proc/<pid>/task file. */
+    char path_buff[PATH_MAX];
+    snprintf_async_signal_safe(path_buff, PATH_MAX, "/proc/%d/task", getpid());
+
+    int dir;
+    if (-1 == (dir = open(path_buff,  O_RDONLY | O_DIRECTORY))) return 0;
+
+    size_t tids_count = 0;
+    pid_t calling_tid = syscall(SYS_gettid);
+    int current_thread_index = -1;
+    long nread;
+    char buff[PATH_MAX];
+
+    /* readdir() is not async-signal-safe (AS-safe).
+    Hence, we read the file using SYS_getdents64, which is considered AS-sync*/
+    while ((nread = syscall(SYS_getdents64, dir, buff, PATH_MAX))) {
+        if (nread == -1) {
+            close(dir);
+            serverLogRawFromHandler(LL_WARNING, "get_ready_to_signal_threads_tids(): Failed to read the process's task directory");
+            return 0;
+        }
+        /* Each thread is represented by a directory */
+        for (long pos = 0; pos < nread;) {
+            struct linux_dirent64 *entry = (struct linux_dirent64 *)(buff + pos);
+            pos += entry->d_reclen;
+            /* Skip irrelevant directories. */
+            if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) continue;
+
+            /* the thread's directory name is equivalent to its tid. */
+           long tid;
+           string2l(entry->d_name, strlen(entry->d_name), &tid);
+
+            if(!is_thread_ready_to_signal(path_buff, entry->d_name, sig_num)) continue;
+
+            if(tid == calling_tid) {
+                current_thread_index = tids_count;
+            }
+
+            /* save the thread id */
+            tids[tids_count++] = tid;
+            
+            /* Stop if we reached the maximum threads number. */
+            if(tids_count == TIDS_MAX_SIZE) {
+                serverLogRawFromHandler(LL_WARNING, "get_ready_to_signal_threads_tids(): Reached the limit of the tids buffer.");
+                break;
+            }
+        }
+
+        if(tids_count == TIDS_MAX_SIZE) break;
+    }
+
+    /* Swap the last tid with the the current thread id */
+    if(current_thread_index != -1) {
+        pid_t last_tid = tids[tids_count - 1];
+
+        tids[tids_count - 1] = calling_tid;
+        tids[current_thread_index] = last_tid;
+    }
+
+    close(dir);
+
+    return tids_count;
+}
+#endif /* __linux__ */
+#endif /* HAVE_BACKTRACE */
diff --git a/src/debugmacro.h b/src/debugmacro.h
index dcd79a33f85..e94b0c0e3bf 100644
--- a/src/debugmacro.h
+++ b/src/debugmacro.h
@@ -2,32 +2,11 @@
  *
  * -----------------------------------------------------------------------------
  *
- * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2016-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef _REDIS_DEBUGMACRO_H_
diff --git a/src/defrag.c b/src/defrag.c
index ff63cf8fdec..78de7224867 100644
--- a/src/defrag.c
+++ b/src/defrag.c
@@ -5,42 +5,28 @@
  * We do that by scanning the keyspace and for each pointer we have, we can try to
  * ask the allocator if moving it to a new address will help reduce fragmentation.
  *
- * Copyright (c) 2020, Redis Labs, Inc
+ * Copyright (c) 2020-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
-#include "cluster.h"
-#include <time.h>
-#include <assert.h>
 #include <stddef.h>
 
 #ifdef HAVE_DEFRAG
 
+typedef struct defragCtx {
+    void *privdata;
+    int slot;
+} defragCtx;
+
+typedef struct defragPubSubCtx {
+    kvstore *pubsub_channels;
+    dict *(*clientPubSubChannels)(client*);
+} defragPubSubCtx;
+
 /* this method was added to jemalloc in order to help us understand which
  * pointers are worthwhile moving and which aren't */
 int je_get_defrag_hint(void* ptr);
@@ -60,7 +46,7 @@ void* activeDefragAlloc(void *ptr) {
     /* move this allocation to a new allocation.
      * make sure not to use the thread cache. so that we don't get back the same
      * pointers we try to free */
-    size = zmalloc_size(ptr);
+    size = zmalloc_usable_size(ptr);
     newptr = zmalloc_no_tcache(size);
     memcpy(newptr, ptr, size);
     zfree_no_tcache(ptr);
@@ -84,14 +70,32 @@ sds activeDefragSds(sds sdsptr) {
     return NULL;
 }
 
-/* Defrag helper for robj and/or string objects
+/* Defrag helper for hfield strings
  *
  * returns NULL in case the allocation wasn't moved.
  * when it returns a non-null value, the old pointer was already released
  * and should NOT be accessed. */
-robj *activeDefragStringOb(robj* ob) {
+hfield activeDefragHfield(hfield hf) {
+    void *ptr = hfieldGetAllocPtr(hf);
+    void *newptr = activeDefragAlloc(ptr);
+    if (newptr) {
+        size_t offset = hf - (char*)ptr;
+        hf = (char*)newptr + offset;
+        return hf;
+    }
+    return NULL;
+}
+
+/* Defrag helper for robj and/or string objects with expected refcount.
+ *
+ * Like activeDefragStringOb, but it requires the caller to pass in the expected
+ * reference count. In some cases, the caller needs to update a robj whose
+ * reference count is not 1, in these cases, the caller must explicitly pass
+ * in the reference count, otherwise defragmentation will not be performed.
+ * Note that the caller is responsible for updating any other references to the robj. */
+robj *activeDefragStringObEx(robj* ob, int expected_refcount) {
     robj *ret = NULL;
-    if (ob->refcount!=1)
+    if (ob->refcount!=expected_refcount)
         return NULL;
 
     /* try to defrag robj (only if not an EMBSTR type (handled below). */
@@ -122,6 +126,15 @@ robj *activeDefragStringOb(robj* ob) {
     return ret;
 }
 
+/* Defrag helper for robj and/or string objects
+ *
+ * returns NULL in case the allocation wasn't moved.
+ * when it returns a non-null value, the old pointer was already released
+ * and should NOT be accessed. */
+robj *activeDefragStringOb(robj* ob) {
+    return activeDefragStringObEx(ob, 1);
+}
+
 /* Defrag helper for lua scripts
  *
  * returns NULL in case the allocation wasn't moved.
@@ -143,11 +156,20 @@ luaScript *activeDefragLuaScript(luaScript *script) {
 }
 
 /* Defrag helper for dict main allocations (dict struct, and hash tables).
- * receives a pointer to the dict* and implicitly updates it when the dict
- * struct itself was moved. Returns a stat of how many pointers were moved. */
-void dictDefragTables(dict* d) {
+ * Receives a pointer to the dict* and return a new dict* when the dict
+ * struct itself was moved.
+ * 
+ * Returns NULL in case the allocation wasn't moved.
+ * When it returns a non-null value, the old pointer was already released
+ * and should NOT be accessed. */
+dict *dictDefragTables(dict *d) {
+    dict *ret = NULL;
     dictEntry **newtable;
+    /* handle the dict struct */
+    if ((ret = activeDefragAlloc(d)))
+        d = ret;
     /* handle the first hash table */
+    if (!d->ht_table[0]) return ret; /* created but unused */
     newtable = activeDefragAlloc(d->ht_table[0]);
     if (newtable)
         d->ht_table[0] = newtable;
@@ -157,6 +179,7 @@ void dictDefragTables(dict* d) {
         if (newtable)
             d->ht_table[1] = newtable;
     }
+    return ret;
 }
 
 /* Internal function used by zslDefrag */
@@ -243,6 +266,31 @@ void activeDefragSdsDictCallback(void *privdata, const dictEntry *de) {
     UNUSED(de);
 }
 
+void activeDefragHfieldDictCallback(void *privdata, const dictEntry *de) {
+    dict *d = privdata;
+    hfield newhf, hf = dictGetKey(de);
+
+    if (hfieldGetExpireTime(hf) == EB_EXPIRE_TIME_INVALID) {
+        /* If the hfield does not have TTL, we directly defrag it. */
+        newhf = activeDefragHfield(hf);
+    } else {
+        /* Update its reference in the ebucket while defragging it. */
+        ebuckets *eb = hashTypeGetDictMetaHFE(d);
+        newhf = ebDefragItem(eb, &hashFieldExpireBucketsType, hf, (ebDefragFunction *)activeDefragHfield);
+    }
+    if (newhf) {
+        /* We can't search in dict for that key after we've released
+         * the pointer it holds, since it won't be able to do the string
+         * compare, but we can find the entry using key hash and pointer. */
+        dictUseStoredKeyApi(d, 1);
+        uint64_t hash = dictGetHash(d, newhf);
+        dictUseStoredKeyApi(d, 0);
+        dictEntry *de = dictFindEntryByPtrAndHash(d, hf, hash);
+        serverAssert(de);
+        dictSetKey(d, de, newhf);
+    }
+}
+
 /* Defrag a dict with sds key and optional value (either ptr, sds or robj string) */
 void activeDefragSdsDict(dict* d, int val_type) {
     unsigned long cursor = 0;
@@ -261,6 +309,20 @@ void activeDefragSdsDict(dict* d, int val_type) {
     } while (cursor != 0);
 }
 
+/* Defrag a dict with hfield key and sds value. */
+void activeDefragHfieldDict(dict *d) {
+    unsigned long cursor = 0;
+    dictDefragFunctions defragfns = {
+        .defragAlloc = activeDefragAlloc,
+        .defragKey = NULL, /* Will be defragmented in activeDefragHfieldDictCallback. */
+        .defragVal = (dictDefragAllocFunction *)activeDefragSds
+    };
+    do {
+        cursor = dictScanDefrag(d, cursor, activeDefragHfieldDictCallback,
+                                &defragfns, d);
+    } while (cursor != 0);
+}
+
 /* Defrag a list of ptr, sds or robj string values */
 void activeDefragList(list *l, int val_type) {
     listNode *ln, *newln;
@@ -415,10 +477,10 @@ void scanLaterHash(robj *ob, unsigned long *cursor) {
     dict *d = ob->ptr;
     dictDefragFunctions defragfns = {
         .defragAlloc = activeDefragAlloc,
-        .defragKey = (dictDefragAllocFunction *)activeDefragSds,
+        .defragKey = NULL, /* Will be defragmented in activeDefragHfieldDictCallback. */
         .defragVal = (dictDefragAllocFunction *)activeDefragSds
     };
-    *cursor = dictScanDefrag(d, *cursor, scanCallbackCountScanned, &defragfns, NULL);
+    *cursor = dictScanDefrag(d, *cursor, activeDefragHfieldDictCallback, &defragfns, d);
 }
 
 void defragQuicklist(redisDb *db, dictEntry *kde) {
@@ -457,11 +519,9 @@ void defragZsetSkiplist(redisDb *db, dictEntry *kde) {
         }
         dictReleaseIterator(di);
     }
-    /* handle the dict struct */
-    if ((newdict = activeDefragAlloc(zs->dict)))
+    /* defrag the dict struct and tables */
+    if ((newdict = dictDefragTables(zs->dict)))
         zs->dict = newdict;
-    /* defrag the dict tables */
-    dictDefragTables(zs->dict);
 }
 
 void defragHash(redisDb *db, dictEntry *kde) {
@@ -472,12 +532,10 @@ void defragHash(redisDb *db, dictEntry *kde) {
     if (dictSize(d) > server.active_defrag_max_scan_fields)
         defragLater(db, kde);
     else
-        activeDefragSdsDict(d, DEFRAG_SDS_DICT_VAL_IS_SDS);
-    /* handle the dict struct */
-    if ((newd = activeDefragAlloc(ob->ptr)))
+        activeDefragHfieldDict(d);
+    /* defrag the dict struct and tables */
+    if ((newd = dictDefragTables(ob->ptr)))
         ob->ptr = newd;
-    /* defrag the dict tables */
-    dictDefragTables(ob->ptr);
 }
 
 void defragSet(redisDb *db, dictEntry *kde) {
@@ -489,11 +547,9 @@ void defragSet(redisDb *db, dictEntry *kde) {
         defragLater(db, kde);
     else
         activeDefragSdsDict(d, DEFRAG_SDS_DICT_NO_VAL);
-    /* handle the dict struct */
-    if ((newd = activeDefragAlloc(ob->ptr)))
+    /* defrag the dict struct and tables */
+    if ((newd = dictDefragTables(ob->ptr)))
         ob->ptr = newd;
-    /* defrag the dict tables */
-    dictDefragTables(ob->ptr);
 }
 
 /* Defrag callback for radix tree iterator, called for each node,
@@ -668,32 +724,43 @@ void defragModule(redisDb *db, dictEntry *kde) {
 }
 
 /* for each key we scan in the main dict, this function will attempt to defrag
- * all the various pointers it has. Returns a stat of how many pointers were
- * moved. */
-void defragKey(redisDb *db, dictEntry *de) {
+ * all the various pointers it has. */
+void defragKey(defragCtx *ctx, dictEntry *de) {
     sds keysds = dictGetKey(de);
-    robj *newob, *ob;
+    robj *newob, *ob = dictGetVal(de);
     unsigned char *newzl;
     sds newsds;
-
+    redisDb *db = ctx->privdata;
+    int slot = ctx->slot;
     /* Try to defrag the key name. */
     newsds = activeDefragSds(keysds);
     if (newsds) {
-        dictSetKey(db->dict, de, newsds);
-        if (dictSize(db->expires)) {
+        kvstoreDictSetKey(db->keys, slot, de, newsds);
+        if (kvstoreDictSize(db->expires, slot)) {
             /* We can't search in db->expires for that key after we've released
              * the pointer it holds, since it won't be able to do the string
              * compare, but we can find the entry using key hash and pointer. */
-            uint64_t hash = dictGetHash(db->dict, newsds);
-            dictEntry *expire_de = dictFindEntryByPtrAndHash(db->expires, keysds, hash);
-            if (expire_de) dictSetKey(db->expires, expire_de, newsds);
+            uint64_t hash = kvstoreGetHash(db->expires, newsds);
+            dictEntry *expire_de = kvstoreDictFindEntryByPtrAndHash(db->expires, slot, keysds, hash);
+            if (expire_de) kvstoreDictSetKey(db->expires, slot, expire_de, newsds);
         }
+
+        /* Update the key's reference in the dict's metadata or the listpackEx. */
+        if (unlikely(ob->type == OBJ_HASH))
+            hashTypeUpdateKeyRef(ob, newsds);
     }
 
     /* Try to defrag robj and / or string value. */
-    ob = dictGetVal(de);
-    if ((newob = activeDefragStringOb(ob))) {
-        dictSetVal(db->dict, de, newob);
+    if (unlikely(ob->type == OBJ_HASH && hashTypeGetMinExpire(ob, 0) != EB_EXPIRE_TIME_INVALID)) {
+        /* Update its reference in the ebucket while defragging it. */
+        newob = ebDefragItem(&db->hexpires, &hashExpireBucketsType, ob,
+                             (ebDefragFunction *)activeDefragStringOb);
+    } else {
+        /* If the dict doesn't have metadata, we directly defrag it. */
+        newob = activeDefragStringOb(ob);
+    }
+    if (newob) {
+        kvstoreDictSetVal(db->keys, slot, de, newob);
         ob = newob;
     }
 
@@ -733,6 +800,12 @@ void defragKey(redisDb *db, dictEntry *de) {
         if (ob->encoding == OBJ_ENCODING_LISTPACK) {
             if ((newzl = activeDefragAlloc(ob->ptr)))
                 ob->ptr = newzl;
+        } else if (ob->encoding == OBJ_ENCODING_LISTPACK_EX) {
+            listpackEx *newlpt, *lpt = (listpackEx*)ob->ptr;
+            if ((newlpt = activeDefragAlloc(lpt)))
+                ob->ptr = lpt = newlpt;
+            if ((newzl = activeDefragAlloc(lpt->lp)))
+                lpt->lp = newzl;
         } else if (ob->encoding == OBJ_ENCODING_HT) {
             defragHash(db, de);
         } else {
@@ -750,7 +823,7 @@ void defragKey(redisDb *db, dictEntry *de) {
 /* Defrag scan callback for the main db dictionary. */
 void defragScanCallback(void *privdata, const dictEntry *de) {
     long long hits_before = server.stat_active_defrag_hits;
-    defragKey((redisDb*)privdata, (dictEntry*)de);
+    defragKey((defragCtx*)privdata, (dictEntry*)de);
     if (server.stat_active_defrag_hits != hits_before)
         server.stat_active_defrag_key_hits++;
     else
@@ -765,20 +838,68 @@ void defragScanCallback(void *privdata, const dictEntry *de) {
  * or not, a false detection can cause the defragmenter to waste a lot of CPU
  * without the possibility of getting any results. */
 float getAllocatorFragmentation(size_t *out_frag_bytes) {
-    size_t resident, active, allocated;
-    zmalloc_get_allocator_info(&allocated, &active, &resident);
-    float frag_pct = ((float)active / allocated)*100 - 100;
-    size_t frag_bytes = active - allocated;
+    size_t resident, active, allocated, frag_smallbins_bytes;
+    zmalloc_get_allocator_info(1, &allocated, &active, &resident, NULL, NULL, &frag_smallbins_bytes);
+
+    if (server.lua_arena != UINT_MAX) {
+        size_t lua_resident, lua_active, lua_allocated, lua_frag_smallbins_bytes;
+        zmalloc_get_allocator_info_by_arena(server.lua_arena, 0, &lua_allocated, &lua_active, &lua_resident, &lua_frag_smallbins_bytes);
+        resident -= lua_resident;
+        active -= lua_active;
+        allocated -= lua_allocated;
+        frag_smallbins_bytes -= lua_frag_smallbins_bytes;
+    }
+
+    /* Calculate the fragmentation ratio as the proportion of wasted memory in small
+     * bins (which are defraggable) relative to the total allocated memory (including large bins).
+     * This is because otherwise, if most of the memory usage is large bins, we may show high percentage,
+     * despite the fact it's not a lot of memory for the user. */
+    float frag_pct = (float)frag_smallbins_bytes / allocated * 100;
     float rss_pct = ((float)resident / allocated)*100 - 100;
     size_t rss_bytes = resident - allocated;
     if(out_frag_bytes)
-        *out_frag_bytes = frag_bytes;
+        *out_frag_bytes = frag_smallbins_bytes;
     serverLog(LL_DEBUG,
-        "allocated=%zu, active=%zu, resident=%zu, frag=%.0f%% (%.0f%% rss), frag_bytes=%zu (%zu rss)",
-        allocated, active, resident, frag_pct, rss_pct, frag_bytes, rss_bytes);
+        "allocated=%zu, active=%zu, resident=%zu, frag=%.2f%% (%.2f%% rss), frag_bytes=%zu (%zu rss)",
+        allocated, active, resident, frag_pct, rss_pct, frag_smallbins_bytes, rss_bytes);
     return frag_pct;
 }
 
+/* Defrag scan callback for the pubsub dictionary. */
+void defragPubsubScanCallback(void *privdata, const dictEntry *de) {
+    defragCtx *ctx = privdata;
+    defragPubSubCtx *pubsub_ctx = ctx->privdata;
+    kvstore *pubsub_channels = pubsub_ctx->pubsub_channels;
+    robj *newchannel, *channel = dictGetKey(de);
+    dict *newclients, *clients = dictGetVal(de);
+
+    /* Try to defrag the channel name. */
+    serverAssert(channel->refcount == (int)dictSize(clients) + 1);
+    newchannel = activeDefragStringObEx(channel, dictSize(clients) + 1);
+    if (newchannel) {
+        kvstoreDictSetKey(pubsub_channels, ctx->slot, (dictEntry*)de, newchannel);
+
+        /* The channel name is shared by the client's pubsub(shard) and server's
+         * pubsub(shard), after defraging the channel name, we need to update
+         * the reference in the clients' dictionary. */
+        dictIterator *di = dictGetIterator(clients);
+        dictEntry *clientde;
+        while((clientde = dictNext(di)) != NULL) {
+            client *c = dictGetKey(clientde);
+            dictEntry *pubsub_channel = dictFind(pubsub_ctx->clientPubSubChannels(c), newchannel);
+            serverAssert(pubsub_channel);
+            dictSetKey(pubsub_ctx->clientPubSubChannels(c), pubsub_channel, newchannel);
+        }
+        dictReleaseIterator(di);
+    }
+
+    /* Try to defrag the dictionary of clients that is stored as the value part. */
+    if ((newclients = dictDefragTables(clients)))
+        kvstoreDictSetVal(pubsub_channels, ctx->slot, (dictEntry*)de, newclients);
+
+    server.stat_active_defrag_scanned++;
+}
+
 /* We may need to defrag other globals, one small allocation can hold a full allocator run.
  * so although small, it is still important to defrag these */
 void defragOtherGlobals(void) {
@@ -788,6 +909,8 @@ void defragOtherGlobals(void) {
      * that remain static for a long time */
     activeDefragSdsDict(evalScriptsDict(), DEFRAG_SDS_DICT_VAL_LUA_SCRIPT);
     moduleDefragGlobals();
+    kvstoreDictLUTDefrag(server.pubsub_channels, dictDefragTables);
+    kvstoreDictLUTDefrag(server.pubsubshard_channels, dictDefragTables);
 }
 
 /* returns 0 more work may or may not be needed (see non-zero cursor),
@@ -821,7 +944,7 @@ static sds defrag_later_current_key = NULL;
 static unsigned long defrag_later_cursor = 0;
 
 /* returns 0 if no more work needs to be been done, and 1 if time is up and more work is needed. */
-int defragLaterStep(redisDb *db, long long endtime) {
+int defragLaterStep(redisDb *db, int slot, long long endtime) {
     unsigned int iterations = 0;
     unsigned long long prev_defragged = server.stat_active_defrag_hits;
     unsigned long long prev_scanned = server.stat_active_defrag_scanned;
@@ -851,7 +974,7 @@ int defragLaterStep(redisDb *db, long long endtime) {
         }
 
         /* each time we enter this function we need to fetch the key from the dict again (if it still exists) */
-        dictEntry *de = dictFind(db->dict, defrag_later_current_key);
+        dictEntry *de = kvstoreDictFind(db->keys, slot, defrag_later_current_key);
         key_defragged = server.stat_active_defrag_hits;
         do {
             int quit = 0;
@@ -896,7 +1019,8 @@ void computeDefragCycles(void) {
             return;
     }
 
-    /* Calculate the adaptive aggressiveness of the defrag */
+    /* Calculate the adaptive aggressiveness of the defrag based on the current
+     * fragmentation and configurations. */
     int cpu_pct = INTERPOLATE(frag_pct,
             server.active_defrag_threshold_lower,
             server.active_defrag_threshold_upper,
@@ -905,10 +1029,15 @@ void computeDefragCycles(void) {
     cpu_pct = LIMIT(cpu_pct,
             server.active_defrag_cycle_min,
             server.active_defrag_cycle_max);
-     /* We allow increasing the aggressiveness during a scan, but don't
-      * reduce it. */
-    if (cpu_pct > server.active_defrag_running) {
+
+    /* Normally we allow increasing the aggressiveness during a scan, but don't
+     * reduce it, since we should not lower the aggressiveness when fragmentation
+     * drops. But when a configuration is made, we should reconsider it. */
+    if (cpu_pct > server.active_defrag_running ||
+        server.active_defrag_configuration_changed)
+    {
         server.active_defrag_running = cpu_pct;
+        server.active_defrag_configuration_changed = 0;
         serverLog(LL_VERBOSE,
             "Starting active defrag, frag=%.0f%%, frag_bytes=%zu, cpu=%d%%",
             frag_pct, frag_bytes, cpu_pct);
@@ -919,9 +1048,11 @@ void computeDefragCycles(void) {
  * This works in a similar way to activeExpireCycle, in the sense that
  * we do incremental work across calls. */
 void activeDefragCycle(void) {
+    static int slot = -1;
     static int current_db = -1;
-    static unsigned long cursor = 0;
-    static unsigned long expires_cursor = 0;
+    static int defrag_later_item_in_progress = 0;
+    static int defrag_stage = 0;
+    static unsigned long defrag_cursor = 0;
     static redisDb *db = NULL;
     static long long start_scan, start_stat;
     unsigned int iterations = 0;
@@ -929,18 +1060,23 @@ void activeDefragCycle(void) {
     unsigned long long prev_scanned = server.stat_active_defrag_scanned;
     long long start, timelimit, endtime;
     mstime_t latency;
+    int all_stages_finished = 0;
     int quit = 0;
 
     if (!server.active_defrag_enabled) {
         if (server.active_defrag_running) {
             /* if active defrag was disabled mid-run, start from fresh next time. */
             server.active_defrag_running = 0;
+            server.active_defrag_configuration_changed = 0;
             if (db)
                 listEmpty(db->defrag_later);
             defrag_later_current_key = NULL;
             defrag_later_cursor = 0;
             current_db = -1;
-            cursor = 0;
+            defrag_stage = 0;
+            defrag_cursor = 0;
+            slot = -1;
+            defrag_later_item_in_progress = 0;
             db = NULL;
             goto update_metrics;
         }
@@ -955,6 +1091,14 @@ void activeDefragCycle(void) {
     run_with_period(1000) {
         computeDefragCycles();
     }
+
+    /* Normally it is checked once a second, but when there is a configuration
+     * change, we want to check it as soon as possible. */
+    if (server.active_defrag_configuration_changed) {
+        computeDefragCycles();
+        server.active_defrag_configuration_changed = 0;
+    }
+
     if (!server.active_defrag_running)
         return;
 
@@ -968,9 +1112,9 @@ void activeDefragCycle(void) {
     dictDefragFunctions defragfns = {.defragAlloc = activeDefragAlloc};
     do {
         /* if we're not continuing a scan from the last call or loop, start a new one */
-        if (!cursor && !expires_cursor) {
+        if (!defrag_stage && !defrag_cursor && (slot < 0)) {
             /* finish any leftovers from previous db before moving to the next one */
-            if (db && defragLaterStep(db, endtime)) {
+            if (db && defragLaterStep(db, slot, endtime)) {
                 quit = 1; /* time is up, we didn't finish all the work */
                 break; /* this will exit the function and we'll continue on the next cycle */
             }
@@ -989,7 +1133,10 @@ void activeDefragCycle(void) {
 
                 start_scan = now;
                 current_db = -1;
-                cursor = 0;
+                defrag_stage = 0;
+                defrag_cursor = 0;
+                slot = -1;
+                defrag_later_item_in_progress = 0;
                 db = NULL;
                 server.active_defrag_running = 0;
 
@@ -1005,38 +1152,79 @@ void activeDefragCycle(void) {
             }
 
             db = &server.db[current_db];
-            cursor = 0;
+            kvstoreDictLUTDefrag(db->keys, dictDefragTables);
+            kvstoreDictLUTDefrag(db->expires, dictDefragTables);
+            defrag_stage = 0;
+            defrag_cursor = 0;
+            slot = -1;
+            defrag_later_item_in_progress = 0;
         }
 
+        /* This array of structures holds the parameters for all defragmentation stages. */
+        typedef struct defragStage {
+            kvstore *kvs;
+            dictScanFunction *scanfn;
+            void *privdata;
+        } defragStage;
+        defragStage defrag_stages[] = {
+            {db->keys, defragScanCallback, db},
+            {db->expires, scanCallbackCountScanned, NULL},
+            {server.pubsub_channels, defragPubsubScanCallback,
+                &(defragPubSubCtx){server.pubsub_channels, getClientPubSubChannels}},
+            {server.pubsubshard_channels, defragPubsubScanCallback,
+                &(defragPubSubCtx){server.pubsubshard_channels, getClientPubSubShardChannels}},
+        };
         do {
+            int num_stages = sizeof(defrag_stages) / sizeof(defrag_stages[0]);
+            serverAssert(defrag_stage < num_stages);
+            defragStage *current_stage = &defrag_stages[defrag_stage];
+
             /* before scanning the next bucket, see if we have big keys left from the previous bucket to scan */
-            if (defragLaterStep(db, endtime)) {
+            if (defragLaterStep(db, slot, endtime)) {
                 quit = 1; /* time is up, we didn't finish all the work */
                 break; /* this will exit the function and we'll continue on the next cycle */
             }
 
-            /* Scan the keyspace dict unless we're scanning the expire dict. */
-            if (!expires_cursor)
-                cursor = dictScanDefrag(db->dict, cursor, defragScanCallback,
-                                        &defragfns, db);
+            if (!defrag_later_item_in_progress) {
+                /* Continue defragmentation from the previous stage.
+                 * If slot is -1, it means this stage starts from the first non-empty slot. */
+                if (slot == -1) slot = kvstoreGetFirstNonEmptyDictIndex(current_stage->kvs);
+                defrag_cursor = kvstoreDictScanDefrag(current_stage->kvs, slot, defrag_cursor,
+                    current_stage->scanfn, &defragfns, &(defragCtx){current_stage->privdata, slot});
+            }
+
+            if (!defrag_cursor) {
+                /* Move to the next slot only if regular and large item scanning has been completed. */
+                if (listLength(db->defrag_later) > 0) {
+                    defrag_later_item_in_progress = 1;
+                    continue;
+                }
 
-            /* When done scanning the keyspace dict, we scan the expire dict. */
-            if (!cursor)
-                expires_cursor = dictScanDefrag(db->expires, expires_cursor,
-                                                scanCallbackCountScanned,
-                                                &defragfns, NULL);
+                /* Move to the next slot in the current stage. If we've reached the end, move to the next stage. */
+                if ((slot = kvstoreGetNextNonEmptyDictIndex(current_stage->kvs, slot)) == -1)
+                    defrag_stage++;
+                defrag_later_item_in_progress = 0;
+            }
 
+            /* Check if all defragmentation stages have been processed.
+             * If so, mark as finished and reset the stage counter to move on to next database. */
+            if (defrag_stage == num_stages) {
+                all_stages_finished = 1;
+                defrag_stage = 0;
+            }
+    
             /* Once in 16 scan iterations, 512 pointer reallocations. or 64 keys
              * (if we have a lot of pointers in one hash bucket or rehashing),
              * check if we reached the time limit.
              * But regardless, don't start a new db in this loop, this is because after
              * the last db we call defragOtherGlobals, which must be done in one cycle */
-            if (!(cursor || expires_cursor) ||
+            if (all_stages_finished ||
                 ++iterations > 16 ||
                 server.stat_active_defrag_hits - prev_defragged > 512 ||
                 server.stat_active_defrag_scanned - prev_scanned > 64)
             {
-                if (!cursor || ustime() > endtime) {
+                /* Quit if all stages were finished or timeout. */
+                if (all_stages_finished || ustime() > endtime) {
                     quit = 1;
                     break;
                 }
@@ -1044,7 +1232,7 @@ void activeDefragCycle(void) {
                 prev_defragged = server.stat_active_defrag_hits;
                 prev_scanned = server.stat_active_defrag_scanned;
             }
-        } while((cursor || expires_cursor) && !quit);
+        } while(!all_stages_finished && !quit);
     } while(!quit);
 
     latencyEndMonitor(latency);
diff --git a/src/dict.c b/src/dict.c
index 6760da1540d..2928d8af5bd 100644
--- a/src/dict.c
+++ b/src/dict.c
@@ -5,32 +5,11 @@
  * tables of power of two in size are used, collisions are handled by
  * chaining. See the source code for more information... :)
  *
- * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2006-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "fmacros.h"
@@ -46,20 +25,23 @@
 #include "dict.h"
 #include "zmalloc.h"
 #include "redisassert.h"
+#include "monotonic.h"
 
-/* Using dictEnableResize() / dictDisableResize() we make possible to disable
+/* Using dictSetResizeEnabled() we make possible to disable
  * resizing and rehashing of the hash table as needed. This is very important
  * for Redis, as we use copy-on-write and don't want to move too much memory
  * around when there is a child performing saving operations.
  *
  * Note that even when dict_can_resize is set to DICT_RESIZE_AVOID, not all
- * resizes are prevented: a hash table is still allowed to grow if the ratio
- * between the number of elements and the buckets > dict_force_resize_ratio. */
+ * resizes are prevented:
+ *  - A hash table is still allowed to expand if the ratio between the number
+ *    of elements and the buckets >= dict_force_resize_ratio.
+ *  - A hash table is still allowed to shrink if the ratio between the number
+ *    of elements and the buckets <= 1 / (HASHTABLE_MIN_FILL * dict_force_resize_ratio). */
 static dictResizeEnable dict_can_resize = DICT_RESIZE_ENABLE;
-static unsigned int dict_force_resize_ratio = 5;
+static unsigned int dict_force_resize_ratio = 4;
 
 /* -------------------------- types ----------------------------------------- */
-
 struct dictEntry {
     void *key;
     union {
@@ -69,9 +51,6 @@ struct dictEntry {
         double d;
     } v;
     struct dictEntry *next;     /* Next entry in the same hash bucket. */
-    void *metadata[];           /* An arbitrary number of bytes (starting at a
-                                 * pointer-aligned address) of size as returned
-                                 * by dictType's dictEntryMetadataBytes(). */
 };
 
 typedef struct {
@@ -81,12 +60,32 @@ typedef struct {
 
 /* -------------------------- private prototypes ---------------------------- */
 
-static int _dictExpandIfNeeded(dict *d);
+static void _dictExpandIfNeeded(dict *d);
+static void _dictShrinkIfNeeded(dict *d);
 static signed char _dictNextExp(unsigned long size);
 static int _dictInit(dict *d, dictType *type);
 static dictEntry *dictGetNext(const dictEntry *de);
 static dictEntry **dictGetNextRef(dictEntry *de);
 static void dictSetNext(dictEntry *de, dictEntry *next);
+static int dictDefaultCompare(dict *d, const void *key1, const void *key2);
+
+/* -------------------------- misc inline functions -------------------------------- */
+
+typedef int (*keyCmpFunc)(dict *d, const void *key1, const void *key2);
+static inline keyCmpFunc dictGetKeyCmpFunc(dict *d) {
+    if (d->useStoredKeyApi && d->type->storedKeyCompare)
+        return d->type->storedKeyCompare;
+    if (d->type->keyCompare)
+        return d->type->keyCompare;
+    return dictDefaultCompare;
+}
+
+static inline uint64_t dictHashKey(dict *d, const void *key, int isStoredKey) {
+    if (isStoredKey && d->type->storedHashFunction)
+        return d->type->storedHashFunction(key);
+    else
+        return d->type->hashFunction(key);
+}
 
 /* -------------------------- hash functions -------------------------------- */
 
@@ -184,16 +183,28 @@ static void _dictReset(dict *d, int htidx)
 /* Create a new hash table */
 dict *dictCreate(dictType *type)
 {
-    size_t metasize = type->dictMetadataBytes ? type->dictMetadataBytes() : 0;
-    dict *d = zmalloc(sizeof(*d) + metasize);
-    if (metasize) {
+    size_t metasize = type->dictMetadataBytes ? type->dictMetadataBytes(NULL) : 0;
+    dict *d = zmalloc(sizeof(*d)+metasize);
+    if (metasize > 0) {
         memset(dictMetadata(d), 0, metasize);
     }
-
     _dictInit(d,type);
     return d;
 }
 
+/* Change dictType of dict to another one with metadata support
+ * Rest of dictType's values must stay the same */
+void dictTypeAddMeta(dict **d, dictType *typeWithMeta) {
+    /* Verify new dictType is compatible with the old one */
+    dictType toCmp = *typeWithMeta;
+    toCmp.dictMetadataBytes = NULL;                            /* Expected old one not to have metadata */
+    toCmp.onDictRelease = (*d)->type->onDictRelease;           /* Ignore 'onDictRelease' in comparison */
+    assert(memcmp((*d)->type, &toCmp, sizeof(dictType)) == 0); /* The rest of the dictType fields must be the same */
+
+    *d = zrealloc(*d, sizeof(dict) + typeWithMeta->dictMetadataBytes(*d));
+    (*d)->type = typeWithMeta;
+}
+
 /* Initialize the hash table */
 int _dictInit(dict *d, dictType *type)
 {
@@ -202,33 +213,20 @@ int _dictInit(dict *d, dictType *type)
     d->type = type;
     d->rehashidx = -1;
     d->pauserehash = 0;
+    d->pauseAutoResize = 0;
+    d->useStoredKeyApi = 0;
     return DICT_OK;
 }
 
-/* Resize the table to the minimal size that contains all the elements,
- * but with the invariant of a USED/BUCKETS ratio near to <= 1 */
-int dictResize(dict *d)
-{
-    unsigned long minimal;
-
-    if (dict_can_resize != DICT_RESIZE_ENABLE || dictIsRehashing(d)) return DICT_ERR;
-    minimal = d->ht_used[0];
-    if (minimal < DICT_HT_INITIAL_SIZE)
-        minimal = DICT_HT_INITIAL_SIZE;
-    return dictExpand(d, minimal);
-}
-
-/* Expand or create the hash table,
+/* Resize or create the hash table,
  * when malloc_failed is non-NULL, it'll avoid panic if malloc fails (in which case it'll be set to 1).
- * Returns DICT_OK if expand was performed, and DICT_ERR if skipped. */
-int _dictExpand(dict *d, unsigned long size, int* malloc_failed)
+ * Returns DICT_OK if resize was performed, and DICT_ERR if skipped. */
+int _dictResize(dict *d, unsigned long size, int* malloc_failed)
 {
     if (malloc_failed) *malloc_failed = 0;
 
-    /* the size is invalid if it is smaller than the number of
-     * elements already inside the hash table */
-    if (dictIsRehashing(d) || d->ht_used[0] > size)
-        return DICT_ERR;
+    /* We can't rehash twice if rehashing is ongoing. */
+    assert(!dictIsRehashing(d));
 
     /* the new hash table */
     dictEntry **new_ht_table;
@@ -236,7 +234,7 @@ int _dictExpand(dict *d, unsigned long size, int* malloc_failed)
     signed char new_ht_size_exp = _dictNextExp(size);
 
     /* Detect overflows */
-    size_t newsize = 1ul<<new_ht_size_exp;
+    size_t newsize = DICTHT_SIZE(new_ht_size_exp);
     if (newsize < size || newsize * sizeof(dictEntry*) < newsize)
         return DICT_ERR;
 
@@ -254,23 +252,40 @@ int _dictExpand(dict *d, unsigned long size, int* malloc_failed)
 
     new_ht_used = 0;
 
-    /* Is this the first initialization? If so it's not really a rehashing
-     * we just set the first hash table so that it can accept keys. */
-    if (d->ht_table[0] == NULL) {
+    /* Prepare a second hash table for incremental rehashing.
+     * We do this even for the first initialization, so that we can trigger the
+     * rehashingStarted more conveniently, we will clean it up right after. */
+    d->ht_size_exp[1] = new_ht_size_exp;
+    d->ht_used[1] = new_ht_used;
+    d->ht_table[1] = new_ht_table;
+    d->rehashidx = 0;
+    if (d->type->rehashingStarted) d->type->rehashingStarted(d);
+
+    /* Is this the first initialization or is the first hash table empty? If so
+     * it's not really a rehashing, we can just set the first hash table so that
+     * it can accept keys. */
+    if (d->ht_table[0] == NULL || d->ht_used[0] == 0) {
+        if (d->type->rehashingCompleted) d->type->rehashingCompleted(d);
+        if (d->ht_table[0]) zfree(d->ht_table[0]);
         d->ht_size_exp[0] = new_ht_size_exp;
         d->ht_used[0] = new_ht_used;
         d->ht_table[0] = new_ht_table;
+        _dictReset(d, 1);
+        d->rehashidx = -1;
         return DICT_OK;
     }
 
-    /* Prepare a second hash table for incremental rehashing */
-    d->ht_size_exp[1] = new_ht_size_exp;
-    d->ht_used[1] = new_ht_used;
-    d->ht_table[1] = new_ht_table;
-    d->rehashidx = 0;
     return DICT_OK;
 }
 
+int _dictExpand(dict *d, unsigned long size, int* malloc_failed) {
+    /* the size is invalid if it is smaller than the size of the hash table 
+     * or smaller than the number of elements already inside the hash table */
+    if (dictIsRehashing(d) || d->ht_used[0] > size || DICTHT_SIZE(d->ht_size_exp[0]) >= size)
+        return DICT_ERR;
+    return _dictResize(d, size, malloc_failed);
+}
+
 /* return DICT_ERR if expand was not performed */
 int dictExpand(dict *d, unsigned long size) {
     return _dictExpand(d, size, NULL);
@@ -278,11 +293,86 @@ int dictExpand(dict *d, unsigned long size) {
 
 /* return DICT_ERR if expand failed due to memory allocation failure */
 int dictTryExpand(dict *d, unsigned long size) {
-    int malloc_failed;
+    int malloc_failed = 0;
     _dictExpand(d, size, &malloc_failed);
     return malloc_failed? DICT_ERR : DICT_OK;
 }
 
+/* return DICT_ERR if shrink was not performed */
+int dictShrink(dict *d, unsigned long size) {
+    /* the size is invalid if it is bigger than the size of the hash table
+     * or smaller than the number of elements already inside the hash table */
+    if (dictIsRehashing(d) || d->ht_used[0] > size || DICTHT_SIZE(d->ht_size_exp[0]) <= size)
+        return DICT_ERR;
+    return _dictResize(d, size, NULL);
+}
+
+/* Helper function for `dictRehash` and `dictBucketRehash` which rehashes all the keys
+ * in a bucket at index `idx` from the old to the new hash HT. */
+static void rehashEntriesInBucketAtIndex(dict *d, uint64_t idx) {
+    dictEntry *de = d->ht_table[0][idx];
+    uint64_t h;
+    dictEntry *nextde;
+    while (de) {
+        nextde = dictGetNext(de);
+        void *key = dictGetKey(de);
+        /* Get the index in the new hash table */
+        if (d->ht_size_exp[1] > d->ht_size_exp[0]) {
+            h = dictHashKey(d, key, 1) & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
+        } else {
+            /* We're shrinking the table. The tables sizes are powers of
+             * two, so we simply mask the bucket index in the larger table
+             * to get the bucket index in the smaller table. */
+            h = idx & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
+        }
+        if (d->type->no_value) {
+            if (d->type->keys_are_odd && !d->ht_table[1][h]) {
+                /* Destination bucket is empty and we can store the key
+                 * directly without an allocated entry. Free the old entry
+                 * if it's an allocated entry.
+                 *
+                 * TODO: Add a flag 'keys_are_even' and if set, we can use
+                 * this optimization for these dicts too. We can set the LSB
+                 * bit when stored as a dict entry and clear it again when
+                 * we need the key back. */
+                assert(entryIsKey(key));
+                if (!entryIsKey(de)) zfree(decodeMaskedPtr(de));
+                de = key;
+            } else if (entryIsKey(de)) {
+                /* We don't have an allocated entry but we need one. */
+                de = createEntryNoValue(key, d->ht_table[1][h]);
+            } else {
+                /* Just move the existing entry to the destination table and
+                 * update the 'next' field. */
+                assert(entryIsNoValue(de));
+                dictSetNext(de, d->ht_table[1][h]);
+            }
+        } else {
+            dictSetNext(de, d->ht_table[1][h]);
+        }
+        d->ht_table[1][h] = de;
+        d->ht_used[0]--;
+        d->ht_used[1]++;
+        de = nextde;
+    }
+    d->ht_table[0][idx] = NULL;
+}
+
+/* This checks if we already rehashed the whole table and if more rehashing is required */
+static int dictCheckRehashingCompleted(dict *d) {
+    if (d->ht_used[0] != 0) return 0;
+    
+    if (d->type->rehashingCompleted) d->type->rehashingCompleted(d);
+    zfree(d->ht_table[0]);
+    /* Copy the new ht onto the old one */
+    d->ht_table[0] = d->ht_table[1];
+    d->ht_used[0] = d->ht_used[1];
+    d->ht_size_exp[0] = d->ht_size_exp[1];
+    _dictReset(d, 1);
+    d->rehashidx = -1;
+    return 1;
+}
+
 /* Performs N steps of incremental rehashing. Returns 1 if there are still
  * keys to move from the old to the new hash table, otherwise 0 is returned.
  *
@@ -297,16 +387,17 @@ int dictRehash(dict *d, int n) {
     unsigned long s0 = DICTHT_SIZE(d->ht_size_exp[0]);
     unsigned long s1 = DICTHT_SIZE(d->ht_size_exp[1]);
     if (dict_can_resize == DICT_RESIZE_FORBID || !dictIsRehashing(d)) return 0;
+    /* If dict_can_resize is DICT_RESIZE_AVOID, we want to avoid rehashing. 
+     * - If expanding, the threshold is dict_force_resize_ratio which is 4.
+     * - If shrinking, the threshold is 1 / (HASHTABLE_MIN_FILL * dict_force_resize_ratio) which is 1/32. */
     if (dict_can_resize == DICT_RESIZE_AVOID && 
-        ((s1 > s0 && s1 / s0 < dict_force_resize_ratio) ||
-         (s1 < s0 && s0 / s1 < dict_force_resize_ratio)))
+        ((s1 > s0 && s1 < dict_force_resize_ratio * s0) ||
+         (s1 < s0 && s0 < HASHTABLE_MIN_FILL * dict_force_resize_ratio * s1)))
     {
         return 0;
     }
 
     while(n-- && d->ht_used[0] != 0) {
-        dictEntry *de, *nextde;
-
         /* Note that rehashidx can't overflow as we are sure there are more
          * elements because ht[0].used != 0 */
         assert(DICTHT_SIZE(d->ht_size_exp[0]) > (unsigned long)d->rehashidx);
@@ -314,70 +405,12 @@ int dictRehash(dict *d, int n) {
             d->rehashidx++;
             if (--empty_visits == 0) return 1;
         }
-        de = d->ht_table[0][d->rehashidx];
         /* Move all the keys in this bucket from the old to the new hash HT */
-        while(de) {
-            uint64_t h;
-
-            nextde = dictGetNext(de);
-            void *key = dictGetKey(de);
-            /* Get the index in the new hash table */
-            if (d->ht_size_exp[1] > d->ht_size_exp[0]) {
-                h = dictHashKey(d, key) & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
-            } else {
-                /* We're shrinking the table. The tables sizes are powers of
-                 * two, so we simply mask the bucket index in the larger table
-                 * to get the bucket index in the smaller table. */
-                h = d->rehashidx & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
-            }
-            if (d->type->no_value) {
-                if (d->type->keys_are_odd && !d->ht_table[1][h]) {
-                    /* Destination bucket is empty and we can store the key
-                     * directly without an allocated entry. Free the old entry
-                     * if it's an allocated entry.
-                     *
-                     * TODO: Add a flag 'keys_are_even' and if set, we can use
-                     * this optimization for these dicts too. We can set the LSB
-                     * bit when stored as a dict entry and clear it again when
-                     * we need the key back. */
-                    assert(entryIsKey(key));
-                    if (!entryIsKey(de)) zfree(decodeMaskedPtr(de));
-                    de = key;
-                } else if (entryIsKey(de)) {
-                    /* We don't have an allocated entry but we need one. */
-                    de = createEntryNoValue(key, d->ht_table[1][h]);
-                } else {
-                    /* Just move the existing entry to the destination table and
-                     * update the 'next' field. */
-                    assert(entryIsNoValue(de));
-                    dictSetNext(de, d->ht_table[1][h]);
-                }
-            } else {
-                dictSetNext(de, d->ht_table[1][h]);
-            }
-            d->ht_table[1][h] = de;
-            d->ht_used[0]--;
-            d->ht_used[1]++;
-            de = nextde;
-        }
-        d->ht_table[0][d->rehashidx] = NULL;
+        rehashEntriesInBucketAtIndex(d, d->rehashidx);
         d->rehashidx++;
     }
 
-    /* Check if we already rehashed the whole table... */
-    if (d->ht_used[0] == 0) {
-        zfree(d->ht_table[0]);
-        /* Copy the new ht onto the old one */
-        d->ht_table[0] = d->ht_table[1];
-        d->ht_used[0] = d->ht_used[1];
-        d->ht_size_exp[0] = d->ht_size_exp[1];
-        _dictReset(d, 1);
-        d->rehashidx = -1;
-        return 0;
-    }
-
-    /* More to rehash... */
-    return 1;
+    return !dictCheckRehashingCompleted(d);
 }
 
 long long timeInMilliseconds(void) {
@@ -387,18 +420,19 @@ long long timeInMilliseconds(void) {
     return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000);
 }
 
-/* Rehash in ms+"delta" milliseconds. The value of "delta" is larger 
- * than 0, and is smaller than 1 in most cases. The exact upper bound 
+/* Rehash in us+"delta" microseconds. The value of "delta" is larger
+ * than 0, and is smaller than 1000 in most cases. The exact upper bound
  * depends on the running time of dictRehash(d,100).*/
-int dictRehashMilliseconds(dict *d, int ms) {
+int dictRehashMicroseconds(dict *d, uint64_t us) {
     if (d->pauserehash > 0) return 0;
 
-    long long start = timeInMilliseconds();
+    monotime timer;
+    elapsedStart(&timer);
     int rehashes = 0;
 
     while(dictRehash(d,100)) {
         rehashes += 100;
-        if (timeInMilliseconds()-start > ms) break;
+        if (elapsedUs(timer) >= us) break;
     }
     return rehashes;
 }
@@ -415,9 +449,24 @@ static void _dictRehashStep(dict *d) {
     if (d->pauserehash == 0) dictRehash(d,1);
 }
 
-/* Return a pointer to the metadata section within the dict. */
-void *dictMetadata(dict *d) {
-    return &d->metadata;
+/* Performs rehashing on a single bucket. */
+int _dictBucketRehash(dict *d, uint64_t idx) {
+    if (d->pauserehash != 0) return 0;
+    unsigned long s0 = DICTHT_SIZE(d->ht_size_exp[0]);
+    unsigned long s1 = DICTHT_SIZE(d->ht_size_exp[1]);
+    if (dict_can_resize == DICT_RESIZE_FORBID || !dictIsRehashing(d)) return 0;
+    /* If dict_can_resize is DICT_RESIZE_AVOID, we want to avoid rehashing. 
+     * - If expanding, the threshold is dict_force_resize_ratio which is 4.
+     * - If shrinking, the threshold is 1 / (HASHTABLE_MIN_FILL * dict_force_resize_ratio) which is 1/32. */
+    if (dict_can_resize == DICT_RESIZE_AVOID && 
+        ((s1 > s0 && s1 < dict_force_resize_ratio * s0) ||
+         (s1 < s0 && s0 < HASHTABLE_MIN_FILL * dict_force_resize_ratio * s1)))
+    {
+        return 0;
+    }
+    rehashEntriesInBucketAtIndex(d, idx);
+    dictCheckRehashingCompleted(d);
+    return 1;
 }
 
 /* Add an element to the target hash table */
@@ -472,9 +521,7 @@ dictEntry *dictInsertAtPosition(dict *d, void *key, void *position) {
     int htidx = dictIsRehashing(d) ? 1 : 0;
     assert(bucket >= &d->ht_table[htidx][0] &&
            bucket <= &d->ht_table[htidx][DICTHT_SIZE_MASK(d->ht_size_exp[htidx])]);
-    size_t metasize = dictEntryMetadataSize(d);
     if (d->type->no_value) {
-        assert(!metasize); /* Entry metadata + no value not supported. */
         if (d->type->keys_are_odd && !*bucket) {
             /* We can store the key directly in the destination bucket without the
              * allocated entry.
@@ -494,11 +541,8 @@ dictEntry *dictInsertAtPosition(dict *d, void *key, void *position) {
          * Insert the element in top, with the assumption that in a database
          * system it is more likely that recently added entries are accessed
          * more frequently. */
-        entry = zmalloc(sizeof(*entry) + metasize);
+        entry = zmalloc(sizeof(*entry));
         assert(entryIsNormal(entry)); /* Check alignment of allocation */
-        if (metasize > 0) {
-            memset(dictEntryMetadata(entry), 0, metasize);
-        }
         entry->key = key;
         entry->next = *bucket;
     }
@@ -561,16 +605,31 @@ static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
     /* dict is empty */
     if (dictSize(d) == 0) return NULL;
 
-    if (dictIsRehashing(d)) _dictRehashStep(d);
-    h = dictHashKey(d, key);
+    h = dictHashKey(d, key, d->useStoredKeyApi);
+    idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
+
+    if (dictIsRehashing(d)) {
+        if ((long)idx >= d->rehashidx && d->ht_table[0][idx]) {
+            /* If we have a valid hash entry at `idx` in ht0, we perform
+             * rehash on the bucket at `idx` (being more CPU cache friendly) */
+            _dictBucketRehash(d, idx);
+        } else {
+            /* If the hash entry is not in ht0, we rehash the buckets based
+             * on the rehashidx (not CPU cache friendly). */
+            _dictRehashStep(d);
+        }
+    }
+
+    keyCmpFunc cmpFunc = dictGetKeyCmpFunc(d);
 
     for (table = 0; table <= 1; table++) {
+        if (table == 0 && (long)idx < d->rehashidx) continue;
         idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
         he = d->ht_table[table][idx];
         prevHe = NULL;
         while(he) {
             void *he_key = dictGetKey(he);
-            if (key == he_key || dictCompareKeys(d, key, he_key)) {
+            if (key == he_key || cmpFunc(d, key, he_key)) {
                 /* Unlink the element from the list */
                 if (prevHe)
                     dictSetNext(prevHe, dictGetNext(he));
@@ -580,6 +639,7 @@ static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
                     dictFreeUnlinkedEntry(d, he);
                 }
                 d->ht_used[table]--;
+                _dictShrinkIfNeeded(d);
                 return he;
             }
             prevHe = he;
@@ -660,6 +720,14 @@ int _dictClear(dict *d, int htidx, void(callback)(dict*)) {
 /* Clear & Release the hash table */
 void dictRelease(dict *d)
 {
+    /* Someone may be monitoring a dict that started rehashing, before
+     * destroying the dict fake completion. */
+    if (dictIsRehashing(d) && d->type->rehashingCompleted)
+        d->type->rehashingCompleted(d);
+
+    if (d->type->onDictRelease)
+        d->type->onDictRelease(d);
+
     _dictClear(d,0,NULL);
     _dictClear(d,1,NULL);
     zfree(d);
@@ -671,14 +739,30 @@ dictEntry *dictFind(dict *d, const void *key)
     uint64_t h, idx, table;
 
     if (dictSize(d) == 0) return NULL; /* dict is empty */
-    if (dictIsRehashing(d)) _dictRehashStep(d);
-    h = dictHashKey(d, key);
+
+    h = dictHashKey(d, key, d->useStoredKeyApi);
+    idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
+    keyCmpFunc cmpFunc = dictGetKeyCmpFunc(d);
+
+    if (dictIsRehashing(d)) {
+        if ((long)idx >= d->rehashidx && d->ht_table[0][idx]) {
+            /* If we have a valid hash entry at `idx` in ht0, we perform
+             * rehash on the bucket at `idx` (being more CPU cache friendly) */
+            _dictBucketRehash(d, idx);
+        } else {
+            /* If the hash entry is not in ht0, we rehash the buckets based
+             * on the rehashidx (not CPU cache friendly). */
+            _dictRehashStep(d);
+        }
+    }
+
     for (table = 0; table <= 1; table++) {
+        if (table == 0 && (long)idx < d->rehashidx) continue;
         idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
         he = d->ht_table[table][idx];
         while(he) {
             void *he_key = dictGetKey(he);
-            if (key == he_key || dictCompareKeys(d, key, he_key))
+            if (key == he_key || cmpFunc(d, key, he_key))
                 return he;
             he = dictGetNext(he);
         }
@@ -715,14 +799,17 @@ dictEntry *dictTwoPhaseUnlinkFind(dict *d, const void *key, dictEntry ***plink,
 
     if (dictSize(d) == 0) return NULL; /* dict is empty */
     if (dictIsRehashing(d)) _dictRehashStep(d);
-    h = dictHashKey(d, key);
+
+    h = dictHashKey(d, key, d->useStoredKeyApi);
+    keyCmpFunc cmpFunc = dictGetKeyCmpFunc(d);
 
     for (table = 0; table <= 1; table++) {
         idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
+        if (table == 0 && (long)idx < d->rehashidx) continue;
         dictEntry **ref = &d->ht_table[table][idx];
         while (ref && *ref) {
             void *de_key = dictGetKey(*ref);
-            if (key == de_key || dictCompareKeys(d, key, de_key)) {
+            if (key == de_key || cmpFunc(d, key, de_key)) {
                 *table_index = table;
                 *plink = ref;
                 dictPauseRehashing(d);
@@ -742,6 +829,7 @@ void dictTwoPhaseUnlinkFree(dict *d, dictEntry *he, dictEntry **plink, int table
     dictFreeKey(d, he);
     dictFreeVal(d, he);
     if (!entryIsKey(he)) zfree(decodeMaskedPtr(he));
+    _dictShrinkIfNeeded(d);
     dictResumeRehashing(d);
 }
 
@@ -788,12 +876,6 @@ double dictIncrDoubleVal(dictEntry *de, double val) {
     return de->v.d += val;
 }
 
-/* A pointer to the metadata section within the dict entry. */
-void *dictEntryMetadata(dictEntry *de) {
-    assert(entryHasValue(de));
-    return &de->metadata;
-}
-
 void *dictGetKey(const dictEntry *de) {
     if (entryIsKey(de)) return (void*)de;
     if (entryIsNoValue(de)) return decodeEntryNoValue(de)->key;
@@ -856,7 +938,7 @@ static void dictSetNext(dictEntry *de, dictEntry *next) {
  * and values. */
 size_t dictMemUsage(const dict *d) {
     return dictSize(d) * sizeof(dictEntry) +
-        dictSlots(d) * sizeof(dictEntry*);
+        dictBuckets(d) * sizeof(dictEntry*);
 }
 
 size_t dictEntryMemUsage(void) {
@@ -950,6 +1032,11 @@ dictEntry *dictNext(dictIterator *iter)
                     dictPauseRehashing(iter->d);
                 else
                     iter->fingerprint = dictFingerprint(iter->d);
+
+                /* skip the rehashed slots in table[0] */
+                if (dictIsRehashing(iter->d)) {
+                    iter->index = iter->d->rehashidx - 1;
+                }
             }
             iter->index++;
             if (iter->index >= (long) DICTHT_SIZE(iter->d->ht_size_exp[iter->table])) {
@@ -995,7 +1082,7 @@ dictEntry *dictGetRandomKey(dict *d)
         do {
             /* We are sure there are no elements in indexes from 0
              * to rehashidx-1 */
-            h = d->rehashidx + (randomULong() % (dictSlots(d) - d->rehashidx));
+            h = d->rehashidx + (randomULong() % (dictBuckets(d) - d->rehashidx));
             he = (h >= s0) ? d->ht_table[1][h - s0] : d->ht_table[0][h];
         } while(he == NULL);
     } else {
@@ -1127,7 +1214,7 @@ unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) {
 
 /* Reallocate the dictEntry, key and value allocations in a bucket using the
  * provided allocation functions in order to defrag them. */
-static void dictDefragBucket(dict *d, dictEntry **bucketref, dictDefragFunctions *defragfns) {
+static void dictDefragBucket(dictEntry **bucketref, dictDefragFunctions *defragfns) {
     dictDefragAllocFunction *defragalloc = defragfns->defragAlloc;
     dictDefragAllocFunction *defragkey = defragfns->defragKey;
     dictDefragAllocFunction *defragval = defragfns->defragVal;
@@ -1154,8 +1241,6 @@ static void dictDefragBucket(dict *d, dictEntry **bucketref, dictDefragFunctions
         }
         if (newde) {
             *bucketref = newde;
-            if (d->type->afterReplaceEntry)
-                d->type->afterReplaceEntry(d, newde);
         }
         bucketref = dictGetNextRef(*bucketref);
     }
@@ -1318,7 +1403,7 @@ unsigned long dictScanDefrag(dict *d,
 
         /* Emit entries at cursor */
         if (defragfns) {
-            dictDefragBucket(d, &d->ht_table[htidx0][v & m0], defragfns);
+            dictDefragBucket(&d->ht_table[htidx0][v & m0], defragfns);
         }
         de = d->ht_table[htidx0][v & m0];
         while (de) {
@@ -1351,7 +1436,7 @@ unsigned long dictScanDefrag(dict *d,
 
         /* Emit entries at cursor */
         if (defragfns) {
-            dictDefragBucket(d, &d->ht_table[htidx0][v & m0], defragfns);
+            dictDefragBucket(&d->ht_table[htidx0][v & m0], defragfns);
         }
         de = d->ht_table[htidx0][v & m0];
         while (de) {
@@ -1365,7 +1450,7 @@ unsigned long dictScanDefrag(dict *d,
         do {
             /* Emit entries at cursor */
             if (defragfns) {
-                dictDefragBucket(d, &d->ht_table[htidx1][v & m1], defragfns);
+                dictDefragBucket(&d->ht_table[htidx1][v & m1], defragfns);
             }
             de = d->ht_table[htidx1][v & m1];
             while (de) {
@@ -1392,52 +1477,92 @@ unsigned long dictScanDefrag(dict *d,
 /* ------------------------- private functions ------------------------------ */
 
 /* Because we may need to allocate huge memory chunk at once when dict
- * expands, we will check this allocation is allowed or not if the dict
- * type has expandAllowed member function. */
-static int dictTypeExpandAllowed(dict *d) {
-    if (d->type->expandAllowed == NULL) return 1;
-    return d->type->expandAllowed(
-                    DICTHT_SIZE(_dictNextExp(d->ht_used[0] + 1)) * sizeof(dictEntry*),
+ * resizes, we will check this allocation is allowed or not if the dict
+ * type has resizeAllowed member function. */
+static int dictTypeResizeAllowed(dict *d, size_t size) {
+    if (d->type->resizeAllowed == NULL) return 1;
+    return d->type->resizeAllowed(
+                    DICTHT_SIZE(_dictNextExp(size)) * sizeof(dictEntry*),
                     (double)d->ht_used[0] / DICTHT_SIZE(d->ht_size_exp[0]));
 }
 
-/* Expand the hash table if needed */
-static int _dictExpandIfNeeded(dict *d)
-{
+/* Returning DICT_OK indicates a successful expand or the dictionary is undergoing rehashing, 
+ * and there is nothing else we need to do about this dictionary currently. While DICT_ERR indicates
+ * that expand has not been triggered (may be try shrinking?)*/
+int dictExpandIfNeeded(dict *d) {
     /* Incremental rehashing already in progress. Return. */
     if (dictIsRehashing(d)) return DICT_OK;
 
     /* If the hash table is empty expand it to the initial size. */
-    if (DICTHT_SIZE(d->ht_size_exp[0]) == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);
+    if (DICTHT_SIZE(d->ht_size_exp[0]) == 0) {
+        dictExpand(d, DICT_HT_INITIAL_SIZE);
+        return DICT_OK;
+    }
 
     /* If we reached the 1:1 ratio, and we are allowed to resize the hash
      * table (global setting) or we should avoid it but the ratio between
      * elements/buckets is over the "safe" threshold, we resize doubling
      * the number of buckets. */
-    if (!dictTypeExpandAllowed(d))
-        return DICT_OK;
     if ((dict_can_resize == DICT_RESIZE_ENABLE &&
          d->ht_used[0] >= DICTHT_SIZE(d->ht_size_exp[0])) ||
         (dict_can_resize != DICT_RESIZE_FORBID &&
-         d->ht_used[0] / DICTHT_SIZE(d->ht_size_exp[0]) > dict_force_resize_ratio))
+         d->ht_used[0] >= dict_force_resize_ratio * DICTHT_SIZE(d->ht_size_exp[0])))
     {
-        return dictExpand(d, d->ht_used[0] + 1);
+        if (dictTypeResizeAllowed(d, d->ht_used[0] + 1))
+            dictExpand(d, d->ht_used[0] + 1);
+        return DICT_OK;
     }
-    return DICT_OK;
+    return DICT_ERR;
+}
+
+/* Expand the hash table if needed */
+static void _dictExpandIfNeeded(dict *d) {
+    /* Automatic resizing is disallowed. Return */
+    if (d->pauseAutoResize > 0) return;
+
+    dictExpandIfNeeded(d);
+}
+
+/* Returning DICT_OK indicates a successful shrinking or the dictionary is undergoing rehashing, 
+ * and there is nothing else we need to do about this dictionary currently. While DICT_ERR indicates
+ * that shrinking has not been triggered (may be try expanding?)*/
+int dictShrinkIfNeeded(dict *d) {
+    /* Incremental rehashing already in progress. Return. */
+    if (dictIsRehashing(d)) return DICT_OK;
+    
+    /* If the size of hash table is DICT_HT_INITIAL_SIZE, don't shrink it. */
+    if (DICTHT_SIZE(d->ht_size_exp[0]) <= DICT_HT_INITIAL_SIZE) return DICT_OK;
+
+    /* If we reached below 1:8 elements/buckets ratio, and we are allowed to resize
+     * the hash table (global setting) or we should avoid it but the ratio is below 1:32,
+     * we'll trigger a resize of the hash table. */
+    if ((dict_can_resize == DICT_RESIZE_ENABLE &&
+         d->ht_used[0] * HASHTABLE_MIN_FILL <= DICTHT_SIZE(d->ht_size_exp[0])) ||
+        (dict_can_resize != DICT_RESIZE_FORBID &&
+         d->ht_used[0] * HASHTABLE_MIN_FILL * dict_force_resize_ratio <= DICTHT_SIZE(d->ht_size_exp[0])))
+    {
+        if (dictTypeResizeAllowed(d, d->ht_used[0]))
+            dictShrink(d, d->ht_used[0]);
+        return DICT_OK;
+    }
+    return DICT_ERR;
+}
+
+static void _dictShrinkIfNeeded(dict *d) 
+{
+    /* Automatic resizing is disallowed. Return */
+    if (d->pauseAutoResize > 0) return;
+
+    dictShrinkIfNeeded(d);
 }
 
-/* TODO: clz optimization */
 /* Our hash table capability is a power of two */
 static signed char _dictNextExp(unsigned long size)
 {
-    unsigned char e = DICT_HT_INITIAL_EXP;
-
+    if (size <= DICT_HT_INITIAL_SIZE) return DICT_HT_INITIAL_EXP;
     if (size >= LONG_MAX) return (8*sizeof(long)-1);
-    while(1) {
-        if (((unsigned long)1<<e) >= size)
-            return e;
-        e++;
-    }
+
+    return 8*sizeof(long) - __builtin_clzl(size-1);
 }
 
 /* Finds and returns the position within the dict where the provided key should
@@ -1447,20 +1572,34 @@ static signed char _dictNextExp(unsigned long size)
 void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing) {
     unsigned long idx, table;
     dictEntry *he;
-    uint64_t hash = dictHashKey(d, key);
+    uint64_t hash = dictHashKey(d, key, d->useStoredKeyApi);
     if (existing) *existing = NULL;
-    if (dictIsRehashing(d)) _dictRehashStep(d);
+    idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
+
+    if (dictIsRehashing(d)) {
+        if ((long)idx >= d->rehashidx && d->ht_table[0][idx]) {
+            /* If we have a valid hash entry at `idx` in ht0, we perform
+             * rehash on the bucket at `idx` (being more CPU cache friendly) */
+            _dictBucketRehash(d, idx);
+        } else {
+            /* If the hash entry is not in ht0, we rehash the buckets based
+             * on the rehashidx (not CPU cache friendly). */
+            _dictRehashStep(d);
+        }
+    }
 
     /* Expand the hash table if needed */
-    if (_dictExpandIfNeeded(d) == DICT_ERR)
-        return NULL;
+    _dictExpandIfNeeded(d);
+    keyCmpFunc cmpFunc = dictGetKeyCmpFunc(d);
+
     for (table = 0; table <= 1; table++) {
+        if (table == 0 && (long)idx < d->rehashidx) continue; 
         idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
         /* Search if this slot does not already contain the given key */
         he = d->ht_table[table][idx];
         while(he) {
             void *he_key = dictGetKey(he);
-            if (key == he_key || dictCompareKeys(d, key, he_key)) {
+            if (key == he_key || cmpFunc(d, key, he_key)) {
                 if (existing) *existing = he;
                 return NULL;
             }
@@ -1476,10 +1615,15 @@ void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing)
 }
 
 void dictEmpty(dict *d, void(callback)(dict*)) {
+    /* Someone may be monitoring a dict that started rehashing, before
+     * destroying the dict fake completion. */
+    if (dictIsRehashing(d) && d->type->rehashingCompleted)
+        d->type->rehashingCompleted(d);
     _dictClear(d,0,callback);
     _dictClear(d,1,callback);
     d->rehashidx = -1;
     d->pauserehash = 0;
+    d->pauseAutoResize = 0;
 }
 
 void dictSetResizeEnabled(dictResizeEnable enable) {
@@ -1487,7 +1631,7 @@ void dictSetResizeEnabled(dictResizeEnable enable) {
 }
 
 uint64_t dictGetHash(dict *d, const void *key) {
-    return dictHashKey(d, key);
+    return dictHashKey(d, key, d->useStoredKeyApi);
 }
 
 /* Finds the dictEntry using pointer and pre-calculated hash.
@@ -1502,6 +1646,7 @@ dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash)
     if (dictSize(d) == 0) return NULL; /* dict is empty */
     for (table = 0; table <= 1; table++) {
         idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
+        if (table == 0 && (long)idx < d->rehashidx) continue;
         he = d->ht_table[table][idx];
         while(he) {
             if (oldptr == dictGetKey(he))
@@ -1513,78 +1658,97 @@ dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash)
     return NULL;
 }
 
-/* ------------------------------- Debugging ---------------------------------*/
+/* Provides the old and new ht size for a given dictionary during rehashing. This method
+ * should only be invoked during initialization/rehashing. */
+void dictRehashingInfo(dict *d, unsigned long long *from_size, unsigned long long *to_size) {
+    /* Invalid method usage if rehashing isn't ongoing. */
+    assert(dictIsRehashing(d));
+    *from_size = DICTHT_SIZE(d->ht_size_exp[0]);
+    *to_size = DICTHT_SIZE(d->ht_size_exp[1]);
+}
 
+/* ------------------------------- Debugging ---------------------------------*/
 #define DICT_STATS_VECTLEN 50
-size_t _dictGetStatsHt(char *buf, size_t bufsize, dict *d, int htidx, int full) {
-    unsigned long i, slots = 0, chainlen, maxchainlen = 0;
-    unsigned long totchainlen = 0;
-    unsigned long clvector[DICT_STATS_VECTLEN];
-    size_t l = 0;
-
-    if (d->ht_used[htidx] == 0) {
-        return snprintf(buf,bufsize,
-            "Hash table %d stats (%s):\n"
-            "No stats available for empty dictionaries\n",
-            htidx, (htidx == 0) ? "main hash table" : "rehashing target");
-    }
-
-    if (!full) {
-        l += snprintf(buf+l,bufsize-l,
-            "Hash table %d stats (%s):\n"
-            " table size: %lu\n"
-            " number of elements: %lu\n",
-            htidx, (htidx == 0) ? "main hash table" : "rehashing target",
-            DICTHT_SIZE(d->ht_size_exp[htidx]), d->ht_used[htidx]);
-
-        /* Make sure there is a NULL term at the end. */
-        buf[bufsize-1] = '\0';
-        /* Unlike snprintf(), return the number of characters actually written. */
-        return strlen(buf);
+void dictFreeStats(dictStats *stats) {
+    zfree(stats->clvector);
+    zfree(stats);
+}
+
+void dictCombineStats(dictStats *from, dictStats *into) {
+    into->buckets += from->buckets;
+    into->maxChainLen = (from->maxChainLen > into->maxChainLen) ? from->maxChainLen : into->maxChainLen;
+    into->totalChainLen += from->totalChainLen;
+    into->htSize += from->htSize;
+    into->htUsed += from->htUsed;
+    for (int i = 0; i < DICT_STATS_VECTLEN; i++) {
+        into->clvector[i] += from->clvector[i];
     }
+}
 
+dictStats *dictGetStatsHt(dict *d, int htidx, int full) {
+    unsigned long *clvector = zcalloc(sizeof(unsigned long) * DICT_STATS_VECTLEN);
+    dictStats *stats = zcalloc(sizeof(dictStats));
+    stats->htidx = htidx;
+    stats->clvector = clvector;
+    stats->htSize = DICTHT_SIZE(d->ht_size_exp[htidx]);
+    stats->htUsed = d->ht_used[htidx];
+    if (!full) return stats;
     /* Compute stats. */
-    for (i = 0; i < DICT_STATS_VECTLEN; i++) clvector[i] = 0;
-    for (i = 0; i < DICTHT_SIZE(d->ht_size_exp[htidx]); i++) {
+    for (unsigned long i = 0; i < DICTHT_SIZE(d->ht_size_exp[htidx]); i++) {
         dictEntry *he;
 
         if (d->ht_table[htidx][i] == NULL) {
             clvector[0]++;
             continue;
         }
-        slots++;
+        stats->buckets++;
         /* For each hash entry on this slot... */
-        chainlen = 0;
+        unsigned long chainlen = 0;
         he = d->ht_table[htidx][i];
         while(he) {
             chainlen++;
             he = dictGetNext(he);
         }
         clvector[(chainlen < DICT_STATS_VECTLEN) ? chainlen : (DICT_STATS_VECTLEN-1)]++;
-        if (chainlen > maxchainlen) maxchainlen = chainlen;
-        totchainlen += chainlen;
+        if (chainlen > stats->maxChainLen) stats->maxChainLen = chainlen;
+        stats->totalChainLen += chainlen;
     }
 
-    /* Generate human readable stats. */
-    l += snprintf(buf+l,bufsize-l,
-        "Hash table %d stats (%s):\n"
-        " table size: %lu\n"
-        " number of elements: %lu\n"
-        " different slots: %lu\n"
-        " max chain length: %lu\n"
-        " avg chain length (counted): %.02f\n"
-        " avg chain length (computed): %.02f\n"
-        " Chain length distribution:\n",
-        htidx, (htidx == 0) ? "main hash table" : "rehashing target",
-        DICTHT_SIZE(d->ht_size_exp[htidx]), d->ht_used[htidx], slots, maxchainlen,
-        (float)totchainlen/slots, (float)d->ht_used[htidx]/slots);
-
-    for (i = 0; i < DICT_STATS_VECTLEN-1; i++) {
-        if (clvector[i] == 0) continue;
-        if (l >= bufsize) break;
-        l += snprintf(buf+l,bufsize-l,
-            "   %ld: %ld (%.02f%%)\n",
-            i, clvector[i], ((float)clvector[i]/DICTHT_SIZE(d->ht_size_exp[htidx]))*100);
+    return stats;
+}
+
+/* Generates human readable stats. */
+size_t dictGetStatsMsg(char *buf, size_t bufsize, dictStats *stats, int full) {
+    if (stats->htUsed == 0) {
+        return snprintf(buf,bufsize,
+            "Hash table %d stats (%s):\n"
+            "No stats available for empty dictionaries\n",
+            stats->htidx, (stats->htidx == 0) ? "main hash table" : "rehashing target");
+    }
+    size_t l = 0;
+    l += snprintf(buf + l, bufsize - l,
+                  "Hash table %d stats (%s):\n"
+                  " table size: %lu\n"
+                  " number of elements: %lu\n",
+                  stats->htidx, (stats->htidx == 0) ? "main hash table" : "rehashing target",
+                  stats->htSize, stats->htUsed);
+    if (full) {
+        l += snprintf(buf + l, bufsize - l,
+                      " different slots: %lu\n"
+                      " max chain length: %lu\n"
+                      " avg chain length (counted): %.02f\n"
+                      " avg chain length (computed): %.02f\n"
+                      " Chain length distribution:\n",
+                      stats->buckets, stats->maxChainLen,
+                      (float) stats->totalChainLen / stats->buckets, (float) stats->htUsed / stats->buckets);
+
+        for (unsigned long i = 0; i < DICT_STATS_VECTLEN - 1; i++) {
+            if (stats->clvector[i] == 0) continue;
+            if (l >= bufsize) break;
+            l += snprintf(buf + l, bufsize - l,
+                          "   %ld: %ld (%.02f%%)\n",
+                          i, stats->clvector[i], ((float) stats->clvector[i] / stats->htSize) * 100);
+        }
     }
 
     /* Make sure there is a NULL term at the end. */
@@ -1598,22 +1762,32 @@ void dictGetStats(char *buf, size_t bufsize, dict *d, int full) {
     char *orig_buf = buf;
     size_t orig_bufsize = bufsize;
 
-    l = _dictGetStatsHt(buf,bufsize,d,0,full);
-    if (dictIsRehashing(d) && bufsize > l) {
-        buf += l;
-        bufsize -= l;
-        _dictGetStatsHt(buf,bufsize,d,1,full);
+    dictStats *mainHtStats = dictGetStatsHt(d, 0, full);
+    l = dictGetStatsMsg(buf, bufsize, mainHtStats, full);
+    dictFreeStats(mainHtStats);
+    buf += l;
+    bufsize -= l;
+    if (dictIsRehashing(d) && bufsize > 0) {
+        dictStats *rehashHtStats = dictGetStatsHt(d, 1, full);
+        dictGetStatsMsg(buf, bufsize, rehashHtStats, full);
+        dictFreeStats(rehashHtStats);
     }
     /* Make sure there is a NULL term at the end. */
     orig_buf[orig_bufsize-1] = '\0';
 }
 
+static int dictDefaultCompare(dict *d, const void *key1, const void *key2) {
+    (void)(d); /*unused*/
+    return key1 == key2;
+}
+
 /* ------------------------------- Benchmark ---------------------------------*/
 
 #ifdef REDIS_TEST
 #include "testhelp.h"
 
 #define UNUSED(V) ((void) V)
+#define TEST(name) printf("test — %s\n", name);
 
 uint64_t hashCallback(const void *key) {
     return dictGenHashFunction((unsigned char*)key, strlen((char*)key));
@@ -1667,8 +1841,10 @@ dictType BenchmarkDictType = {
 int dictTest(int argc, char **argv, int flags) {
     long j;
     long long start, elapsed;
+    int retval;
     dict *dict = dictCreate(&BenchmarkDictType);
     long count = 0;
+    unsigned long new_dict_size, current_dict_used, remain_keys;
     int accurate = (flags & REDIS_TEST_ACCURATE);
 
     if (argc == 4) {
@@ -1681,9 +1857,135 @@ int dictTest(int argc, char **argv, int flags) {
         count = 5000;
     }
 
+    TEST("Add 16 keys and verify dict resize is ok") {
+        dictSetResizeEnabled(DICT_RESIZE_ENABLE);
+        for (j = 0; j < 16; j++) {
+            retval = dictAdd(dict,stringFromLongLong(j),(void*)j);
+            assert(retval == DICT_OK);
+        }
+        while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000);
+        assert(dictSize(dict) == 16);
+        assert(dictBuckets(dict) == 16);
+    }
+
+    TEST("Use DICT_RESIZE_AVOID to disable the dict resize and pad to (dict_force_resize_ratio * 16)") {
+        /* Use DICT_RESIZE_AVOID to disable the dict resize, and pad
+         * the number of keys to (dict_force_resize_ratio * 16), so we can satisfy
+         * dict_force_resize_ratio in next test. */
+        dictSetResizeEnabled(DICT_RESIZE_AVOID);
+        for (j = 16; j < (long)dict_force_resize_ratio * 16; j++) {
+            retval = dictAdd(dict,stringFromLongLong(j),(void*)j);
+            assert(retval == DICT_OK);
+        }
+        current_dict_used = dict_force_resize_ratio * 16;
+        assert(dictSize(dict) == current_dict_used);
+        assert(dictBuckets(dict) == 16);
+    }
+
+    TEST("Add one more key, trigger the dict resize") {
+        retval = dictAdd(dict,stringFromLongLong(current_dict_used),(void*)(current_dict_used));
+        assert(retval == DICT_OK);
+        current_dict_used++;
+        new_dict_size = 1UL << _dictNextExp(current_dict_used);
+        assert(dictSize(dict) == current_dict_used);
+        assert(DICTHT_SIZE(dict->ht_size_exp[0]) == 16);
+        assert(DICTHT_SIZE(dict->ht_size_exp[1]) == new_dict_size);
+
+        /* Wait for rehashing. */
+        dictSetResizeEnabled(DICT_RESIZE_ENABLE);
+        while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000);
+        assert(dictSize(dict) == current_dict_used);
+        assert(DICTHT_SIZE(dict->ht_size_exp[0]) == new_dict_size);
+        assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 0);
+    }
+
+    TEST("Delete keys until we can trigger shrink in next test") {
+        /* Delete keys until we can satisfy (1 / HASHTABLE_MIN_FILL) in the next test. */
+        for (j = new_dict_size / HASHTABLE_MIN_FILL + 1; j < (long)current_dict_used; j++) {
+            char *key = stringFromLongLong(j);
+            retval = dictDelete(dict, key);
+            zfree(key);
+            assert(retval == DICT_OK);
+        }
+        current_dict_used = new_dict_size / HASHTABLE_MIN_FILL + 1;
+        assert(dictSize(dict) == current_dict_used);
+        assert(DICTHT_SIZE(dict->ht_size_exp[0]) == new_dict_size);
+        assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 0);
+    }
+
+    TEST("Delete one more key, trigger the dict resize") {
+        current_dict_used--;
+        char *key = stringFromLongLong(current_dict_used);
+        retval = dictDelete(dict, key);
+        zfree(key);
+        unsigned long oldDictSize = new_dict_size;
+        new_dict_size = 1UL << _dictNextExp(current_dict_used);
+        assert(retval == DICT_OK);
+        assert(dictSize(dict) == current_dict_used);
+        assert(DICTHT_SIZE(dict->ht_size_exp[0]) == oldDictSize);
+        assert(DICTHT_SIZE(dict->ht_size_exp[1]) == new_dict_size);
+
+        /* Wait for rehashing. */
+        while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000);
+        assert(dictSize(dict) == current_dict_used);
+        assert(DICTHT_SIZE(dict->ht_size_exp[0]) == new_dict_size);
+        assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 0);
+    }
+
+    TEST("Empty the dictionary and add 128 keys") {
+        dictEmpty(dict, NULL);
+        for (j = 0; j < 128; j++) {
+            retval = dictAdd(dict,stringFromLongLong(j),(void*)j);
+            assert(retval == DICT_OK);
+        }
+        while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000);
+        assert(dictSize(dict) == 128);
+        assert(dictBuckets(dict) == 128);
+    }
+
+    TEST("Use DICT_RESIZE_AVOID to disable the dict resize and reduce to 3") {
+        /* Use DICT_RESIZE_AVOID to disable the dict reset, and reduce
+         * the number of keys until we can trigger shrinking in next test. */
+        dictSetResizeEnabled(DICT_RESIZE_AVOID);
+        remain_keys = DICTHT_SIZE(dict->ht_size_exp[0]) / (HASHTABLE_MIN_FILL * dict_force_resize_ratio) + 1;
+        for (j = remain_keys; j < 128; j++) {
+            char *key = stringFromLongLong(j);
+            retval = dictDelete(dict, key);
+            zfree(key);
+            assert(retval == DICT_OK);
+        }
+        current_dict_used = remain_keys;
+        assert(dictSize(dict) == remain_keys);
+        assert(dictBuckets(dict) == 128);
+    }
+
+    TEST("Delete one more key, trigger the dict resize") {
+        current_dict_used--;
+        char *key = stringFromLongLong(current_dict_used);
+        retval = dictDelete(dict, key);
+        zfree(key);
+        new_dict_size = 1UL << _dictNextExp(current_dict_used);
+        assert(retval == DICT_OK);
+        assert(dictSize(dict) == current_dict_used);
+        assert(DICTHT_SIZE(dict->ht_size_exp[0]) == 128);
+        assert(DICTHT_SIZE(dict->ht_size_exp[1]) == new_dict_size);
+
+        /* Wait for rehashing. */
+        dictSetResizeEnabled(DICT_RESIZE_ENABLE);
+        while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000);
+        assert(dictSize(dict) == current_dict_used);
+        assert(DICTHT_SIZE(dict->ht_size_exp[0]) == new_dict_size);
+        assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 0);
+    }
+
+    TEST("Restore to original state") {
+        dictEmpty(dict, NULL);
+        dictSetResizeEnabled(DICT_RESIZE_ENABLE);
+    }
+
     start_benchmark();
     for (j = 0; j < count; j++) {
-        int retval = dictAdd(dict,stringFromLongLong(j),(void*)j);
+        retval = dictAdd(dict,stringFromLongLong(j),(void*)j);
         assert(retval == DICT_OK);
     }
     end_benchmark("Inserting");
@@ -1691,7 +1993,7 @@ int dictTest(int argc, char **argv, int flags) {
 
     /* Wait for rehashing. */
     while (dictIsRehashing(dict)) {
-        dictRehashMilliseconds(dict,100);
+        dictRehashMicroseconds(dict,100*1000);
     }
 
     start_benchmark();
@@ -1741,7 +2043,7 @@ int dictTest(int argc, char **argv, int flags) {
     start_benchmark();
     for (j = 0; j < count; j++) {
         char *key = stringFromLongLong(j);
-        int retval = dictDelete(dict,key);
+        retval = dictDelete(dict,key);
         assert(retval == DICT_OK);
         key[0] += 17; /* Change first number to letter. */
         retval = dictAdd(dict,key,(void*)j);
diff --git a/src/dict.h b/src/dict.h
index e96cd44eb19..1c0e6accd32 100644
--- a/src/dict.h
+++ b/src/dict.h
@@ -5,32 +5,11 @@
  * tables of power of two in size are used, collisions are handled by
  * chaining. See the source code for more information... :)
  *
- * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2006-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __DICT_H
@@ -44,18 +23,33 @@
 #define DICT_OK 0
 #define DICT_ERR 1
 
-typedef struct dictEntry dictEntry; /* opaque */
+/* Hash table parameters */
+#define HASHTABLE_MIN_FILL        8      /* Minimal hash table fill 12.5%(100/8) */
 
+typedef struct dictEntry dictEntry; /* opaque */
 typedef struct dict dict;
 
 typedef struct dictType {
+    /* Callbacks */
     uint64_t (*hashFunction)(const void *key);
     void *(*keyDup)(dict *d, const void *key);
     void *(*valDup)(dict *d, const void *obj);
     int (*keyCompare)(dict *d, const void *key1, const void *key2);
     void (*keyDestructor)(dict *d, void *key);
     void (*valDestructor)(dict *d, void *obj);
-    int (*expandAllowed)(size_t moreMem, double usedRatio);
+    int (*resizeAllowed)(size_t moreMem, double usedRatio);
+    /* Invoked at the start of dict initialization/rehashing (old and new ht are already created) */
+    void (*rehashingStarted)(dict *d);
+    /* Invoked at the end of dict initialization/rehashing of all the entries from old to new ht. Both ht still exists
+     * and are cleaned up after this callback.  */
+    void (*rehashingCompleted)(dict *d);
+    /* Allow a dict to carry extra caller-defined metadata. The
+     * extra memory is initialized to 0 when a dict is allocated. */
+    size_t (*dictMetadataBytes)(dict *d);
+
+    /* Data */
+    void *userdata;
+
     /* Flags */
     /* The 'no_value' flag, if set, indicates that values are not used, i.e. the
      * dict is a set. When this flag is set, it's not possible to access the
@@ -68,14 +62,32 @@ typedef struct dictType {
     unsigned int keys_are_odd:1;
     /* TODO: Add a 'keys_are_even' flag and use a similar optimization if that
      * flag is set. */
+    /* Sometimes we want the ability to store a key in a given way inside the hash
+     * function, and lookup it in some other way without resorting to any kind of
+     * conversion. For instance the key may be stored as a structure also
+     * representing other things, but the lookup happens via just a pointer to a
+     * null terminated string. Optionally providing additional hash/cmp functions,
+     * dict supports such usage. In that case we'll have a hashFunction() that will
+     * expect a null terminated C string, and a storedHashFunction() that will
+     * instead expect the structure. Similarly, the two comparison functions will
+     * work differently. The keyCompare() will treat the first argument as a pointer
+     * to a C string and the other as a structure (this way we can directly lookup
+     * the structure key using the C string). While the storedKeyCompare() will
+     * check if two pointers to the key in structure form are the same.
+     *
+     * However, functions of dict that gets key as argument (void *key) don't get
+     * any indication whether it is a lookup or stored key. To indicate that
+     * you intend to use key of type stored-key, and, consequently, use
+     * dedicated compare and hash functions of stored-key, is by calling
+     * dictUseStoredKeyApi(1) before using any of the dict functions that gets
+     * key as a parameter and then call again  dictUseStoredKeyApi(0) once done.
+     *
+     * Set to NULL both functions, if you don't want to support this feature. */
+    uint64_t (*storedHashFunction)(const void *key);
+    int (*storedKeyCompare)(dict *d, const void *key1, const void *key2);
 
-    /* Allow each dict and dictEntry to carry extra caller-defined metadata. The
-     * extra memory is initialized to 0 when allocated. */
-    size_t (*dictEntryMetadataBytes)(dict *d);
-    size_t (*dictMetadataBytes)(void);
-    /* Optional callback called after an entry has been reallocated (due to
-     * active defrag). Only called if the entry has metadata. */
-    void (*afterReplaceEntry)(dict *d, dictEntry *entry);
+    /* Optional callback called when the dict is destroyed. */
+    void (*onDictRelease)(dict *d);
 } dictType;
 
 #define DICTHT_SIZE(exp) ((exp) == -1 ? 0 : (unsigned long)1<<(exp))
@@ -90,12 +102,12 @@ struct dict {
     long rehashidx; /* rehashing not in progress if rehashidx == -1 */
 
     /* Keep small vars at end for optimal (minimal) struct padding */
-    int16_t pauserehash; /* If >0 rehashing is paused (<0 indicates coding error) */
-    signed char ht_size_exp[2]; /* exponent of size. (size = 1<<exp) */
+    unsigned pauserehash : 15; /* If >0 rehashing is paused */
 
-    void *metadata[];           /* An arbitrary number of bytes (starting at a
-                                 * pointer-aligned address) of size as defined
-                                 * by dictType's dictEntryBytes. */
+    unsigned useStoredKeyApi : 1; /* See comment of storedHashFunction above */
+    signed char ht_size_exp[2]; /* exponent of size. (size = 1<<exp) */
+    int16_t pauseAutoResize;  /* If >0 automatic resizing is disallowed (<0 indicates coding error) */
+    void *metadata[];
 };
 
 /* If safe is set to 1 this is a safe iterator, that means, you can call
@@ -111,6 +123,16 @@ typedef struct dictIterator {
     unsigned long long fingerprint;
 } dictIterator;
 
+typedef struct dictStats {
+    int htidx;
+    unsigned long buckets;
+    unsigned long maxChainLen;
+    unsigned long totalChainLen;
+    unsigned long htSize;
+    unsigned long htUsed;
+    unsigned long *clvector;
+} dictStats;
+
 typedef void (dictScanFunction)(void *privdata, const dictEntry *de);
 typedef void *(dictDefragAllocFunction)(void *ptr);
 typedef struct {
@@ -138,17 +160,20 @@ typedef struct {
         (d)->type->keyCompare((d), key1, key2) : \
         (key1) == (key2))
 
-#define dictEntryMetadataSize(d) ((d)->type->dictEntryMetadataBytes     \
-                                  ? (d)->type->dictEntryMetadataBytes(d) : 0)
-#define dictMetadataSize(d) ((d)->type->dictMetadataBytes               \
-                             ? (d)->type->dictMetadataBytes() : 0)
+#define dictMetadata(d) (&(d)->metadata)
+#define dictMetadataSize(d) ((d)->type->dictMetadataBytes \
+                             ? (d)->type->dictMetadataBytes(d) : 0)
 
-#define dictHashKey(d, key) ((d)->type->hashFunction(key))
-#define dictSlots(d) (DICTHT_SIZE((d)->ht_size_exp[0])+DICTHT_SIZE((d)->ht_size_exp[1]))
+#define dictBuckets(d) (DICTHT_SIZE((d)->ht_size_exp[0])+DICTHT_SIZE((d)->ht_size_exp[1]))
 #define dictSize(d) ((d)->ht_used[0]+(d)->ht_used[1])
+#define dictIsEmpty(d) ((d)->ht_used[0] == 0 && (d)->ht_used[1] == 0)
 #define dictIsRehashing(d) ((d)->rehashidx != -1)
 #define dictPauseRehashing(d) ((d)->pauserehash++)
 #define dictResumeRehashing(d) ((d)->pauserehash--)
+#define dictIsRehashingPaused(d) ((d)->pauserehash > 0)
+#define dictPauseAutoResize(d) ((d)->pauseAutoResize++)
+#define dictResumeAutoResize(d) ((d)->pauseAutoResize--)
+#define dictUseStoredKeyApi(d, flag) ((d)->useStoredKeyApi = (flag))
 
 /* If our unsigned long type can store a 64 bit number, use a 64 bit PRNG. */
 #if ULONG_MAX >= 0xffffffffffffffff
@@ -165,9 +190,10 @@ typedef enum {
 
 /* API */
 dict *dictCreate(dictType *type);
+void dictTypeAddMeta(dict **d, dictType *typeWithMeta);
 int dictExpand(dict *d, unsigned long size);
 int dictTryExpand(dict *d, unsigned long size);
-void *dictMetadata(dict *d);
+int dictShrink(dict *d, unsigned long size);
 int dictAdd(dict *d, void *key, void *val);
 dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing);
 void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing);
@@ -182,7 +208,8 @@ void dictTwoPhaseUnlinkFree(dict *d, dictEntry *he, dictEntry **plink, int table
 void dictRelease(dict *d);
 dictEntry * dictFind(dict *d, const void *key);
 void *dictFetchValue(dict *d, const void *key);
-int dictResize(dict *d);
+int dictShrinkIfNeeded(dict *d);
+int dictExpandIfNeeded(dict *d);
 void dictSetKey(dict *d, dictEntry* de, void *key);
 void dictSetVal(dict *d, dictEntry *de, void *val);
 void dictSetSignedIntegerVal(dictEntry *de, int64_t val);
@@ -216,13 +243,19 @@ uint64_t dictGenCaseHashFunction(const unsigned char *buf, size_t len);
 void dictEmpty(dict *d, void(callback)(dict*));
 void dictSetResizeEnabled(dictResizeEnable enable);
 int dictRehash(dict *d, int n);
-int dictRehashMilliseconds(dict *d, int ms);
+int dictRehashMicroseconds(dict *d, uint64_t us);
 void dictSetHashFunctionSeed(uint8_t *seed);
 uint8_t *dictGetHashFunctionSeed(void);
 unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *privdata);
 unsigned long dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctions *defragfns, void *privdata);
 uint64_t dictGetHash(dict *d, const void *key);
 dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash);
+void dictRehashingInfo(dict *d, unsigned long long *from_size, unsigned long long *to_size);
+
+size_t dictGetStatsMsg(char *buf, size_t bufsize, dictStats *stats, int full);
+dictStats* dictGetStatsHt(dict *d, int htidx, int full);
+void dictCombineStats(dictStats *from, dictStats *into);
+void dictFreeStats(dictStats *stats);
 
 #ifdef REDIS_TEST
 int dictTest(int argc, char *argv[], int flags);
diff --git a/src/ebuckets.c b/src/ebuckets.c
new file mode 100644
index 00000000000..f4f88fadee4
--- /dev/null
+++ b/src/ebuckets.c
@@ -0,0 +1,2440 @@
+/*
+ * Copyright Redis Ltd. 2024 - present
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0 (RSALv2)
+ * or the Server Side Public License v1 (SSPLv1).
+ */
+
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include "zmalloc.h"
+#include "redisassert.h"
+#include "config.h"
+#include "ebuckets.h"
+
+#define UNUSED(x) (void)(x)
+
+
+/*** DEBUGGING & VALIDATION
+ *
+ * To validate DS on add(), remove() and ebExpire()
+ * #define EB_VALIDATE_DEBUG 1
+ */
+
+#if (REDIS_TEST || EB_VALIDATE_DEBUG) && !defined(EB_TEST_BENCHMARK)
+#define EB_VALIDATE_STRUCTURE(eb, type) ebValidate(eb, type)
+#else
+#define EB_VALIDATE_STRUCTURE(eb, type) // Do nothing
+#endif
+
+/*** BENCHMARK
+ *
+ * To benchmark ebuckets creation and active-expire with 10 million items, apply
+ * the following command such that `EB_TEST_BENCHMARK` gets desired distribution
+ * of expiration times:
+ *
+ *   # 0=1msec, 1=1sec, 2=1min, 3=1hour, 4=1day, 5=1week, 6=1month
+ *   make REDIS_CFLAGS='-DREDIS_TEST -DEB_TEST_BENCHMARK=3' && ./src/redis-server test ebuckets
+ */
+
+/*
+ *  Keep just enough bytes of bucket-key, taking into consideration configured
+ *  EB_BUCKET_KEY_PRECISION, and ignoring LSB bits that has no impact.
+ *
+ * The main motivation is that since the bucket-key size determines the maximum
+ * depth of the rax tree, then we can prune the tree to be more shallow and thus
+ * reduce the maintenance and traversal of each node in the B-tree.
+ */
+#if EB_BUCKET_KEY_PRECISION < 8
+#define EB_KEY_SIZE 6
+#elif EB_BUCKET_KEY_PRECISION >= 8 && EB_BUCKET_KEY_PRECISION < 16
+#define EB_KEY_SIZE 5
+#else
+#define EB_KEY_SIZE 4
+#endif
+
+/*
+ * EB_SEG_MAX_ITEMS - Maximum number of items in rax-segment before trying to
+ * split. To simplify, it has the same value as EB_LIST_MAX_ITEMS.
+ */
+#define EB_SEG_MAX_ITEMS 16
+#define EB_LIST_MAX_ITEMS EB_SEG_MAX_ITEMS
+
+/* From expiration time to bucket-key */
+#define EB_BUCKET_KEY(exptime) ((exptime) >> EB_BUCKET_KEY_PRECISION)
+
+ /* From bucket-key to expiration time */
+#define EB_BUCKET_EXP_TIME(bucketKey) ((uint64_t)(bucketKey) << EB_BUCKET_KEY_PRECISION)
+
+/*** structs ***/
+
+typedef struct CommonSegHdr {
+    eItem head;
+} CommonSegHdr;
+
+
+/* FirstSegHdr - Header of first segment of a bucket.
+ *
+ * A bucket in rax tree with a single segment will be as follows:
+ *
+ *            +-------------+     +------------+             +------------+
+ *            | FirstSegHdr |     | eItem(1)   |             | eItem(N)   |
+ * [rax] -->  | eItem head  | --> | void *next | --> ... --> | void *next | --+
+ *            +-------------+     +------------+             +------------+   |
+ *                    ^                                                       |
+ *                    |                                                       |
+ *                    +-------------------------------------------------------+
+ *
+ * Note that the cyclic references assist to update locally the segment(s) without
+ * the need to "heavy" traversal of the rax tree for each change.
+ */
+typedef struct FirstSegHdr {
+    eItem head;          /* first item in the list */
+    uint32_t totalItems; /* total items in the bucket, across chained segments */
+    uint32_t numSegs;    /* number of segments in the bucket */
+} FirstSegHdr;
+
+/* NextSegHdr - Header of next segment in an extended-segment (bucket)
+ *
+ * Here is the layout of an extended-segment, after adding another item to a single,
+ * full (EB_SEG_MAX_ITEMS=16), segment (all items must have same bucket-key value):
+ *
+ *            +-------------+     +------------+      +------------+     +------------+             +------------+
+ *            | FirstSegHdr |     | eItem(17)  |      | NextSegHdr |     | eItem(1)   |             | eItem(16)  |
+ * [rax] -->  | eItem head  | --> | void *next | -->  | eItem head | --> | void *next | --> ... --> | void *next | --+
+ *            +-------------+     +------------+      +------------+     +------------+             +------------+   |
+ *                    ^                                  |    ^                                                      |
+ *                    |                                  |    |                                                      |
+ *                    +------------- firstSeg / prevSeg -+    +------------------------------------------------------+
+ */
+typedef struct NextSegHdr {
+    eItem head;
+    CommonSegHdr *prevSeg; /* pointer to previous segment */
+    FirstSegHdr *firstSeg; /* pointer to first segment of the bucket */
+} NextSegHdr;
+
+/* Selective copy of ifndef from server.h instead of including it */
+#ifndef static_assert
+#define static_assert(expr, lit) extern char __static_assert_failure[(expr) ? 1:-1]
+#endif
+/* Verify that "head" field is aligned in FirstSegHdr, NextSegHdr and CommonSegHdr */
+static_assert(offsetof(FirstSegHdr, head) == 0, "FirstSegHdr head is not aligned");
+static_assert(offsetof(NextSegHdr, head) == 0, "FirstSegHdr head is not aligned");
+static_assert(offsetof(CommonSegHdr, head) == 0, "FirstSegHdr head is not aligned");
+/* Verify attached metadata to rax is aligned */
+static_assert(offsetof(rax, metadata) % sizeof(void*) == 0, "metadata field is not aligned in rax");
+
+/* EBucketNew - Indicates the caller to create a new bucket following the addition
+ * of another item to a bucket (either single-segment or extended-segment). */
+typedef struct EBucketNew {
+    FirstSegHdr segment;
+    ExpireMeta *mLast;  /* last item in the chain */
+    uint64_t ebKey;
+} EBucketNew;
+
+static void ebNewBucket(EbucketsType *type, EBucketNew *newBucket, eItem item, uint64_t key);
+static int ebBucketPrint(uint64_t bucketKey, EbucketsType *type, FirstSegHdr *firstSeg);
+static uint64_t *ebRaxNumItems(rax *rax);
+
+/*** Static functions ***/
+
+/* Extract pointer to list from ebuckets handler */
+static inline rax *ebGetRaxPtr(ebuckets eb) { return (rax *)eb; }
+
+/* The lsb in ebuckets pointer determines whether the pointer points to rax or list. */
+static inline int ebIsList(ebuckets eb) {
+    return (((uintptr_t)(void *)eb & 0x1) == 1);
+}
+/* set lsb in ebuckets pointer to 1 to mark it as list. Unless empty (NULL) */
+static inline ebuckets ebMarkAsList(eItem item) {
+    if (item == NULL) return item;
+
+    /* either 'itemsAddrAreOdd' or not, we end up with lsb is set to 1 */
+    return (void *) ((uintptr_t) item | 1);
+}
+
+/* Extract pointer to the list from ebuckets handler */
+static inline eItem ebGetListPtr(EbucketsType *type, ebuckets eb) {
+    /* if 'itemsAddrAreOdd' then no need to reset lsb bit */
+    if (type->itemsAddrAreOdd)
+        return eb;
+    else
+        return (void*)((uintptr_t)(eb) & ~1);
+}
+
+/* Converts the logical starting time value of a given bucket-key to its equivalent
+ * "physical" value in the context of an rax tree (rax-key). Although their values
+ * are the same, their memory layouts differ. The raxKey layout orders bytes in
+ * memory is from the MSB to the LSB, and the length of the key is EB_KEY_SIZE. */
+static inline void bucketKey2RaxKey(uint64_t bucketKey, unsigned char *raxKey) {
+    for (int i = EB_KEY_SIZE-1; i >= 0; --i) {
+        raxKey[i] = (unsigned char) (bucketKey & 0xFF);
+        bucketKey >>= 8;
+    }
+}
+
+/* Converts the "physical" value of rax-key to its logical counterpart, representing
+ * the starting time value of a bucket. The values are equivalent, but their memory
+ * layouts differ. The raxKey is assumed to be ordered from the MSB to the LSB with
+ * a length of EB_KEY_SIZE. The resulting bucket-key is the logical representation
+ * with respect to ebuckets. */
+static inline uint64_t raxKey2BucketKey(unsigned char *raxKey) {
+    uint64_t bucketKey = 0;
+    for (int i = 0; i < EB_KEY_SIZE ; ++i)
+        bucketKey = (bucketKey<<8) + raxKey[i];
+    return bucketKey;
+}
+
+/* Add another item to a bucket that consists of extended-segments. In this
+ * scenario, all items in the bucket share the same bucket-key value and the first
+ * segment is already full (if not, the function ebSegAddAvail() would have being
+ * called). This requires the creation of another segment. The layout of the
+ * segments before and after the addition of the new item is as follows:
+ *
+ *  Before:                               [segHdr] -> {item1,..,item16} -> [..]
+ *  After:   [segHdr] -> {newItem} -> [nextSegHdr] -> {item1,..,item16} -> [..]
+ *
+ *  Taken care to persist `segHdr` to be the same instance after the change.
+ *  This is important because the rax tree is pointing to it. */
+static int ebSegAddExtended(EbucketsType *type, FirstSegHdr *firstSegHdr, eItem newItem) {
+    /* Allocate nextSegHdr and let it take the items of first segment header */
+    NextSegHdr *nextSegHdr = zmalloc(sizeof(NextSegHdr));
+    nextSegHdr->head = firstSegHdr->head;
+    /* firstSegHdr will stay the first and new nextSegHdr will follow it */
+    nextSegHdr->prevSeg = (CommonSegHdr *) firstSegHdr;
+    nextSegHdr->firstSeg = firstSegHdr;
+
+    ExpireMeta *mIter = type->getExpireMeta(nextSegHdr->head);
+    mIter->firstItemBucket = 0;
+    for (int i = 0 ; i < EB_SEG_MAX_ITEMS-1 ; i++)
+        mIter = type->getExpireMeta(mIter->next);
+
+    if (mIter->lastItemBucket) {
+        mIter->next = nextSegHdr;
+    } else {
+        /* Update next-next-segment to point back to next-segment */
+        NextSegHdr *nextNextSegHdr = mIter->next;
+        nextNextSegHdr->prevSeg = (CommonSegHdr *) nextSegHdr;
+    }
+
+    firstSegHdr->numSegs += 1;
+    firstSegHdr->totalItems += 1;
+    firstSegHdr->head = newItem;
+
+    ExpireMeta *mNewItem = type->getExpireMeta(newItem);
+    mNewItem->numItems = 1;
+    mNewItem->next = nextSegHdr;
+    mNewItem->firstItemBucket = 1;
+    mNewItem->lastInSegment = 1;
+
+    return 0;
+}
+
+/* Add another eItem to a segment with available space. Keep items sorted in ascending order */
+static int ebSegAddAvail(EbucketsType *type, FirstSegHdr *seg, eItem item) {
+    eItem head = seg->head;
+    ExpireMeta *nextMeta;
+    ExpireMeta *mHead = type->getExpireMeta(head);
+    ExpireMeta *mItem = type->getExpireMeta(item);
+    uint64_t itemExpireTime = ebGetMetaExpTime(mItem);
+
+    seg->totalItems++;
+
+    assert(mHead->numItems < EB_SEG_MAX_ITEMS);
+
+    /* if new item expiry time is smaller than the head then add it before the head */
+    if (ebGetMetaExpTime(mHead) > itemExpireTime) {
+        /* Insert item as the new head */
+        mItem->next = head;
+        mItem->firstItemBucket = mHead->firstItemBucket;
+        mItem->numItems = mHead->numItems + 1;
+        mHead->firstItemBucket = 0;
+        mHead->numItems = 0;
+        seg->head = item;
+        return 0;
+    }
+
+    /* Insert item in the middle of segment */
+    ExpireMeta *mIter = mHead;
+    for (int i = 1 ; i < mHead->numItems ; i++) {
+        nextMeta = type->getExpireMeta(mIter->next);
+        /* Insert item in the middle */
+        if (ebGetMetaExpTime(nextMeta) > itemExpireTime) {
+            mHead->numItems = mHead->numItems + 1;
+            mItem->next = mIter->next;
+            mIter->next = item;
+            return 0;
+        }
+        mIter = nextMeta;
+    }
+
+    /* Insert item as the last item of the segment. Inherit flags from previous last item */
+    mHead->numItems = mHead->numItems + 1;
+    mItem->next = mIter->next;
+    mItem->lastInSegment = mIter->lastInSegment;
+    mItem->lastItemBucket = mIter->lastItemBucket;
+    mIter->lastInSegment = 0;
+    mIter->lastItemBucket = 0;
+    mIter->next = item;
+    return 0;
+}
+
+/* Return 1 if split segment to two succeeded. Else, return 0. The only reason
+ * the split can fail is that All the items in the segment have the same bucket-key */
+static int ebTrySegSplit(EbucketsType *type, FirstSegHdr *seg, EBucketNew *newBucket) {
+    int minMidDist=(EB_SEG_MAX_ITEMS / 2), bestMiddleIndex = -1;
+    uint64_t splitKey = -1;
+    eItem firstItemSecondPart;
+    ExpireMeta *mLastItemFirstPart, *mFirstItemSecondPart;
+
+    eItem head = seg->head;
+    ExpireMeta *mHead = type->getExpireMeta(head);
+    ExpireMeta *mNext, *mIter = mHead;
+
+    /* Search for best middle index to split the segment into two segments. As the
+     * items are arranged in ascending order, it cannot split between two items that
+     * have the same expiration time and therefore the split won't necessarily be
+     * balanced (Or won't be possible to split at all if all have the same exp-time!)
+     */
+    for (int i = 0 ; i < EB_SEG_MAX_ITEMS-1 ; i++) {
+        //printf ("i=%d\n", i);
+        mNext = type->getExpireMeta(mIter->next);
+        if (EB_BUCKET_KEY(ebGetMetaExpTime(mNext)) > EB_BUCKET_KEY(
+                                                         ebGetMetaExpTime(mIter))) {
+            /* If found better middle index before reaching halfway, save it */
+            if (i < (EB_SEG_MAX_ITEMS/2)) {
+                splitKey = EB_BUCKET_KEY(ebGetMetaExpTime(mNext));
+                bestMiddleIndex = i;
+                mLastItemFirstPart = mIter;
+                mFirstItemSecondPart = mNext;
+                firstItemSecondPart = mIter->next;
+                minMidDist = (EB_SEG_MAX_ITEMS / 2) - bestMiddleIndex;
+            } else {
+                /* after crossing the middle need only to look for the first diff */
+                if (minMidDist > (i + 1 - EB_SEG_MAX_ITEMS / 2)) {
+                    splitKey = EB_BUCKET_KEY(ebGetMetaExpTime(mNext));
+                    bestMiddleIndex = i;
+                    mLastItemFirstPart = mIter;
+                    mFirstItemSecondPart = mNext;
+                    firstItemSecondPart = mIter->next;
+                    minMidDist = i + 1 - EB_SEG_MAX_ITEMS / 2;
+                }
+            }
+        }
+        mIter = mNext;
+    }
+
+    /* If cannot find index to split because all with same EB_BUCKET_KEY(), then
+     * segment should be treated as extended segment */
+    if (bestMiddleIndex == -1)
+        return 0;
+
+    /* New bucket */
+    newBucket->segment.head = firstItemSecondPart;
+    newBucket->segment.numSegs = 1;
+    newBucket->segment.totalItems = EB_SEG_MAX_ITEMS - bestMiddleIndex - 1;
+    mFirstItemSecondPart->numItems = EB_SEG_MAX_ITEMS - bestMiddleIndex - 1;
+    newBucket->mLast = mIter;
+    newBucket->ebKey = splitKey;
+    mIter->lastInSegment = 1;
+    mIter->lastItemBucket = 1;
+    mIter->next = &newBucket->segment; /* to be updated by caller */
+    mFirstItemSecondPart->firstItemBucket = 1;
+
+    /* update existing bucket */
+    seg->totalItems = bestMiddleIndex + 1;
+    mHead->numItems = bestMiddleIndex + 1;
+    mLastItemFirstPart->lastInSegment = 1;
+    mLastItemFirstPart->lastItemBucket = 1;
+    mLastItemFirstPart->next = seg;
+    return 1;
+}
+
+/* Return 1 if managed to expire the entire segment. Returns 0 otherwise. */
+int ebSingleSegExpire(FirstSegHdr *firstSegHdr,
+                             EbucketsType *type,
+                             ExpireInfo *info,
+                             eItem *updateList)
+{
+    uint64_t itemExpTime;
+    eItem iter = firstSegHdr->head;
+    ExpireMeta *mIter = type->getExpireMeta(iter);
+    uint32_t i=0, numItemsInSeg = mIter->numItems;
+
+    while (info->itemsExpired < info->maxToExpire) {
+        itemExpTime = ebGetMetaExpTime(mIter);
+
+        /* Items are arranged in ascending expire-time order in a segment. Stops
+         * active expiration when an item's expire time is greater than `now`. */
+        if (itemExpTime > info->now)
+            break;
+
+        /* keep aside next before deletion of iter */
+        eItem next = mIter->next;
+        mIter->trash = 1;
+        ExpireAction act = info->onExpireItem(iter, info->ctx);
+
+        /* if (act == ACT_REMOVE_EXP_ITEM)
+         *  then don't touch the item. Assume it got deleted */
+
+        /* If indicated to stop then break (cb didn't delete the item) */
+        if (act == ACT_STOP_ACTIVE_EXP) {
+            mIter->trash = 0;
+            break;
+        }
+
+        /* If indicated to re-insert the item, then chain it to updateList.
+         * it will be ebAdd() back to ebuckets at the end of ebExpire() */
+        if (act == ACT_UPDATE_EXP_ITEM) {
+            mIter->next = *updateList;
+            *updateList = iter;
+        }
+
+        ++info->itemsExpired;
+
+        /* if deleted all items in segment, delete header and return */
+        if (++i == numItemsInSeg) {
+            zfree(firstSegHdr);
+            return 1;
+        }
+
+        /* More items in the segment. Set iter to next item and update mIter */
+        iter = next;
+        mIter = type->getExpireMeta(iter);
+    }
+
+    /* Update the single-segment with remaining items */
+    mIter->numItems = numItemsInSeg - i;
+    mIter->firstItemBucket = 1;
+    firstSegHdr->head = iter;
+    firstSegHdr->totalItems -= i;
+
+    /* Update nextExpireTime */
+    info->nextExpireTime = ebGetMetaExpTime(mIter);
+
+    return 0;
+}
+
+/* return 1 if managed to expire the entire segment. Returns 0 otherwise. */
+static int ebSegExpire(FirstSegHdr *firstSegHdr,
+                       EbucketsType *type,
+                       ExpireInfo *info,
+                       eItem *updateList)
+{
+    eItem iter = firstSegHdr->head;
+    uint32_t numSegs = firstSegHdr->numSegs;
+    void *nextSegHdr = firstSegHdr;
+
+    if (numSegs == 1)
+        return ebSingleSegExpire(firstSegHdr, type, info, updateList);
+
+    /*
+     * In an extended-segment, there's no need to verify the expiration time of
+     * each item. This is because all items in an extended-segment share the same
+     * bucket-key. Therefore, we can remove all items without checking their
+     * individual expiration times. This is different from a single-segment
+     * scenario, where items can have different bucket-keys.
+     */
+    for (uint32_t seg=0 ; seg < numSegs ; seg++) {
+        uint32_t i;
+        ExpireMeta *mIter = type->getExpireMeta(iter);
+        uint32_t numItemsInSeg = mIter->numItems;
+
+        for (i = 0; (i < numItemsInSeg) && (info->itemsExpired < info->maxToExpire) ; ++i) {
+            mIter = type->getExpireMeta(iter);
+
+            /* keep aside `next` before removing `iter` by onExpireItem */
+            eItem next = mIter->next;
+            mIter->trash = 1;
+            ExpireAction act = info->onExpireItem(iter, info->ctx);
+
+            /* if (act == ACT_REMOVE_EXP_ITEM)
+             *  then don't touch the item. Assume it got deleted */
+
+            /* If indicated to stop then break (callback didn't delete the item) */
+            if (act == ACT_STOP_ACTIVE_EXP) {
+                mIter->trash = 0;
+                break;
+            }
+
+            /* If indicated to re-insert the item, then chain it to updateList.
+             * it will be ebAdd() back to ebuckets at the end of ebExpire() */
+            if (act == ACT_UPDATE_EXP_ITEM) {
+                mIter->next = *updateList;
+                *updateList = iter;
+            }
+
+            /* Item was REMOVED/UPDATED. Advance to `next` item */
+            iter = next;
+            ++info->itemsExpired;
+            firstSegHdr->totalItems -= 1;
+        }
+
+        /* if deleted all items in segment */
+        if (i == numItemsInSeg) {
+            /* If not last segment in bucket, then delete segment header */
+            if (seg + 1 < numSegs) {
+                nextSegHdr = iter;
+                iter = ((NextSegHdr *) nextSegHdr)->head;
+                zfree(nextSegHdr);
+                firstSegHdr->numSegs -= 1;
+                firstSegHdr->head = iter;
+                mIter = type->getExpireMeta(iter);
+                mIter->firstItemBucket = 1;
+            }
+        } else {
+            /* We reached here because for-loop above break due to
+             * ACT_STOP_ACTIVE_EXP or reached maxToExpire */
+            firstSegHdr->head = iter;
+            mIter = type->getExpireMeta(iter);
+            mIter->numItems = numItemsInSeg - i;
+            mIter->firstItemBucket = 1;
+            info->nextExpireTime = ebGetMetaExpTime(mIter);
+
+            /* If deleted one or more segments, update prevSeg of next seg to point firstSegHdr.
+             * If it is the last segment, then last item need to point firstSegHdr */
+            if (seg>0) {
+                int numItems = mIter->numItems;
+                for (int i = 0; i < numItems - 1; i++)
+                    mIter = type->getExpireMeta(mIter->next);
+
+                if (mIter->lastItemBucket) {
+                    mIter->next = firstSegHdr;
+                } else {
+                    /* Update next-segment to point back to firstSegHdr */
+                    NextSegHdr *nsh = mIter->next;
+                    nsh->prevSeg = (CommonSegHdr *) firstSegHdr;
+                }
+            }
+
+            return 0;
+        }
+    }
+
+    /* deleted last segment in bucket */
+    zfree(firstSegHdr);
+    return 1;
+}
+
+/*** Static functions of list ***/
+
+/* Convert a list to rax.
+ *
+ * To create a new rax, the function first converts the list to a segment by
+ * allocating a segment header and attaching to it the already existing list.
+ * Then, it adds the new segment to the rax as the first bucket. */
+static rax *ebConvertListToRax(eItem listHead, EbucketsType *type) {
+    FirstSegHdr *firstSegHdr = zmalloc(sizeof(FirstSegHdr));
+    firstSegHdr->head = listHead;
+    firstSegHdr->totalItems = EB_LIST_MAX_ITEMS ;
+    firstSegHdr->numSegs = 1;
+
+    /* update last item to point on the segment header */
+    ExpireMeta *metaItem = type->getExpireMeta(listHead);
+    uint64_t bucketKey = EB_BUCKET_KEY(ebGetMetaExpTime(metaItem));
+    while (metaItem->lastItemBucket == 0)
+        metaItem = type->getExpireMeta(metaItem->next);
+    metaItem->next = firstSegHdr;
+
+    /* Use min expire-time for the first segment in rax */
+    unsigned char raxKey[EB_KEY_SIZE];
+    bucketKey2RaxKey(bucketKey, raxKey);
+    rax *rax = raxNewWithMetadata(sizeof(uint64_t));
+    *ebRaxNumItems(rax) = EB_LIST_MAX_ITEMS;
+    raxInsert(rax, raxKey, EB_KEY_SIZE, firstSegHdr, NULL);
+    return rax;
+}
+
+/**
+ * Adds another 'item' to the ebucket of type list, keeping the list sorted by
+ * ascending expiration time.
+ *
+ * @param eb - Pointer to the ebuckets handler of type list. Gets updated if the item is
+ * added as the new head.
+ * @param type - Pointer to the EbucketsType structure defining the type of ebucket.
+ * @param item - The eItem to be added to the list.
+ *
+ * @return 1 if the maximum list length is reached; otherwise, return 0.
+ */
+static int ebAddToList(ebuckets *eb, EbucketsType *type, eItem item) {
+    ExpireMeta *metaItem = type->getExpireMeta(item);
+
+    /* if ebucket-list is empty (NULL), then create a new list by marking 'item'
+     * as the head and tail of the list */
+    if (unlikely(ebIsEmpty(*eb))) {
+        metaItem->next = NULL;
+        metaItem->numItems = 1;
+        metaItem->lastInSegment = 1;
+        metaItem->firstItemBucket = 1;
+        metaItem->lastItemBucket = 1;
+        *eb = ebMarkAsList(item);
+        return 0;
+    }
+
+    eItem head = ebGetListPtr(type, *eb);
+    ExpireMeta *metaHead = type->getExpireMeta(head);
+
+    /* If reached max items in list, then return 1 */
+    if (metaHead->numItems == EB_LIST_MAX_ITEMS)
+        return 1;
+
+    /* if expiry time of 'item' is smaller than the head then add it as the new head */
+    if (ebGetMetaExpTime(metaHead) > ebGetMetaExpTime(metaItem)) {
+        /* Insert item as the new head */
+        metaItem->next = head;
+        metaItem->firstItemBucket = 1;
+        metaItem->numItems = metaHead->numItems + 1;
+        metaHead->firstItemBucket = 0;
+        metaHead->numItems = 0;
+        *eb = ebMarkAsList(item);
+        return 0;
+    }
+
+
+    /* Try insert item in the middle of list */
+    ExpireMeta *mIter = metaHead;
+    for (int i = 1 ; i < metaHead->numItems ; i++) {
+        ExpireMeta *nextMeta = type->getExpireMeta(mIter->next);
+        /* Insert item in the middle */
+        if (ebGetMetaExpTime(nextMeta) > ebGetMetaExpTime(metaItem)) {
+            metaHead->numItems += 1;
+            metaItem->next = mIter->next;
+            mIter->next = item;
+            return 0;
+        }
+        mIter = nextMeta;
+    }
+
+    /* Insert item as the last item of the list. */
+    metaHead->numItems += 1;
+    metaItem->next = NULL;
+    metaItem->lastInSegment = 1;
+    metaItem->lastItemBucket = 1;
+    /* Update obsolete last item */
+    mIter->lastInSegment = 0;
+    mIter->lastItemBucket = 0;
+    mIter->next = item;
+    return 0;
+}
+
+/* return 1 if removed from list. Otherwise, return 0 */
+static int ebRemoveFromList(ebuckets *eb, EbucketsType *type, eItem item) {
+    if (ebIsEmpty(*eb))
+        return 0; /* not removed */
+
+    ExpireMeta *metaItem = type->getExpireMeta(item);
+    eItem head = ebGetListPtr(type, *eb);
+
+    /* if item is the head of the list */
+    if (head == item) {
+        eItem newHead = metaItem->next;
+        if (newHead != NULL) {
+            ExpireMeta *mNewHead = type->getExpireMeta(newHead);
+            mNewHead->numItems = metaItem->numItems - 1;
+            mNewHead->firstItemBucket = 1;
+            *eb = ebMarkAsList(newHead);
+            return 1; /* removed */
+        }
+        *eb = NULL;
+        return 1; /* removed */
+    }
+
+    /* item is not the head of the list */
+    ExpireMeta *metaHead = type->getExpireMeta(head);
+
+    eItem iter = head;
+    while (iter != NULL) {
+        ExpireMeta *metaIter = type->getExpireMeta(iter);
+        if (metaIter->next == item) {
+            metaIter->next = metaItem->next;
+            /* If deleted item is the last in the list, then update new last item */
+            if (metaItem->next == NULL) {
+                metaIter->lastInSegment = 1;
+                metaIter->lastItemBucket = 1;
+            }
+            metaHead->numItems -= 1;
+            return 1; /* removed */
+        }
+        iter = metaIter->next;
+    }
+    return 0; /* not removed */
+}
+
+/* return 1 if none left. Otherwise return 0 */
+static int ebListExpire(ebuckets *eb,
+                        EbucketsType *type,
+                        ExpireInfo *info,
+                        eItem *updateList)
+{
+    uint32_t expired = 0;
+    eItem item = ebGetListPtr(type, *eb);
+    ExpireMeta *metaItem = type->getExpireMeta(item);
+    uint32_t numItems = metaItem->numItems; /* first item must exists */
+
+    while (item != NULL) {
+        metaItem = type->getExpireMeta(item);
+        uint64_t itemExpTime = ebGetMetaExpTime(metaItem);
+
+        /* Items are arranged in ascending expire-time order in a list. Stops list
+         * active expiration when an item's expiration time is greater than `now`. */
+        if (itemExpTime > info->now)
+            break;
+
+        if (info->itemsExpired == info->maxToExpire)
+            break;
+
+        /* keep aside `next` before removing `iter` by onExpireItem */
+        eItem *next = metaItem->next;
+        metaItem->trash = 1;
+        ExpireAction act = info->onExpireItem(item, info->ctx);
+
+        /* if (act == ACT_REMOVE_EXP_ITEM)
+         *  then don't touch the item. Assume it got deleted */
+
+        /* If indicated to stop then break (cb didn't delete the item) */
+        if (act == ACT_STOP_ACTIVE_EXP) {
+            metaItem->trash = 0;
+            break;
+        }
+
+        /* If indicated to re-insert the item, then chain it to updateList.
+         * it will be ebAdd() back to ebuckets at the end of ebExpire() */
+        if (act == ACT_UPDATE_EXP_ITEM) {
+            metaItem->next = *updateList;
+            *updateList = item;
+        }
+
+        ++expired;
+        ++(info->itemsExpired);
+        item = next;
+    }
+
+    if (expired == numItems) {
+        *eb = NULL;
+        info->nextExpireTime = EB_EXPIRE_TIME_INVALID;
+        return 1;
+    }
+
+    metaItem->numItems = numItems - expired;
+    metaItem->firstItemBucket = 1;
+    info->nextExpireTime = ebGetMetaExpTime(metaItem);
+    *eb = ebMarkAsList(item);
+    return 0;
+}
+
+/* Validate the general structure of the list */
+static void ebValidateList(eItem head, EbucketsType *type) {
+    if (head == NULL)
+        return;
+
+    ExpireMeta *mHead = type->getExpireMeta(head);
+    eItem iter = head;
+    ExpireMeta *mIter = type->getExpireMeta(iter), *mIterPrev = NULL;
+
+    for (int i = 0; i < mHead->numItems ; ++i) {
+        mIter = type->getExpireMeta(iter);
+        if (i == 0) {
+            /* first item */
+            assert(mIter->numItems > 0 && mIter->numItems <= EB_LIST_MAX_ITEMS);
+            assert(mIter->firstItemBucket == 1);
+        } else  {
+            /* Verify that expire time of previous item is smaller or equal */
+            assert(ebGetMetaExpTime(mIterPrev) <= ebGetMetaExpTime(mIter));
+            assert(mIter->numItems == 0);
+            assert(mIter->firstItemBucket == 0);
+        }
+
+        if (i == (mHead->numItems - 1)) {
+            /* last item */
+            assert(mIter->lastInSegment == 1);
+            assert(mIter->lastItemBucket == 1);
+            assert(mIter->next == NULL);
+        } else {
+            assert(mIter->lastInSegment == 0);
+            assert(mIter->lastItemBucket == 0);
+            assert(mIter->next != NULL);
+            mIterPrev = mIter;
+            iter = mIter->next;
+        }
+    }
+}
+
+/*** Static functions of ebuckets / rax ***/
+
+static uint64_t *ebRaxNumItems(rax *rax) {
+    return (uint64_t*) rax->metadata;
+}
+
+/* Allocate a single segment with a single item */
+static void ebNewBucket(EbucketsType *type, EBucketNew *newBucket, eItem item, uint64_t key) {
+    ExpireMeta *mItem = type->getExpireMeta(item);
+
+    newBucket->segment.head = item;
+    newBucket->segment.totalItems = 1;
+    newBucket->segment.numSegs = 1;
+    newBucket->mLast = type->getExpireMeta(item);
+    newBucket->ebKey = key;
+    mItem->numItems = 1;
+    mItem->firstItemBucket = 1;
+    mItem->lastInSegment = 1;
+    mItem->lastItemBucket = 1;
+    mItem->next = &newBucket->segment;
+}
+
+/*
+ * ebBucketPrint - Prints all the segments in the bucket and time expiration
+ * of each item in the following fashion:
+ *
+ *      Bucket(tot=0008,sgs=0001) :    [11, 21, 26, 27, 29, 49, 59, 62]
+ *      Bucket(tot=0007,sgs=0001) :    [67, 86, 90, 92, 115, 123, 126]
+ *      Bucket(tot=0005,sgs=0001) :    [130, 135, 135, 136, 140]
+ *      Bucket(tot=0009,sgs=0002) :    [182]
+ *                                     [162, 163, 167, 168, 172, 177, 183, 186]
+ *      Bucket(tot=0001,sgs=0001) :    [193]
+ */
+static int ebBucketPrint(uint64_t bucketKey, EbucketsType *type, FirstSegHdr *firstSeg) {
+    eItem iter;
+    ExpireMeta *mIter, *mHead;
+    static int PRINT_EXPIRE_META_FLAGS=0;
+
+    iter = firstSeg->head;
+    mHead = type->getExpireMeta(iter);
+
+    printf("Bucket(key=%06" PRIu64 ",tot=%04d,sgs=%04d) :", bucketKey, firstSeg->totalItems, firstSeg->numSegs);
+    while (1) {
+        mIter = type->getExpireMeta(iter);  /* not really needed. Just to hash the compiler */
+        printf("    [");
+        for (int i = 0; i < mHead->numItems ; ++i) {
+            mIter = type->getExpireMeta(iter);
+            uint64_t expireTime = ebGetMetaExpTime(mIter);
+
+            if (i == 0 && PRINT_EXPIRE_META_FLAGS)
+                printf("%" PRIu64 "<n=%d,f=%d,ls=%d,lb=%d>, ",
+                       expireTime, mIter->numItems, mIter->firstItemBucket,
+                       mIter->lastInSegment, mIter->lastItemBucket);
+            else if (i == (mHead->numItems - 1) && PRINT_EXPIRE_META_FLAGS) {
+                printf("%" PRIu64 "<n=%d,f=%d,ls=%d,lb=%d>",
+                       expireTime, mIter->numItems, mIter->firstItemBucket,
+                       mIter->lastInSegment, mIter->lastItemBucket);
+            } else
+                printf("%" PRIu64 "%s", expireTime, (i == mHead->numItems - 1) ? "" : ", ");
+
+            iter = mIter->next;
+        }
+
+        if (mIter->lastItemBucket) {
+            printf("]\n");
+            break;
+        }
+        printf("]\n                           ");
+        iter = ((NextSegHdr *) mIter->next)->head;
+        mHead = type->getExpireMeta(iter);
+
+    }
+    return 0;
+}
+
+/* Add another eItem to bucket. If needed return 'newBucket' for insertion in rax tree.
+ *
+ * 1) If the bucket is based on a single, not full segment, then add the item to the segment.
+ * 2) If a single, full segment, then try to split it and then add the item.
+ * 3) If failed to split, then all items in the bucket have the same bucket-key.
+ *    - If the new item has the same bucket-key, then extend the segment to
+ *      be an extended-segment, if not already, and add the item to it.
+ *    - If the new item has a different bucket-key, then allocate a new bucket
+ *      for it.
+ */
+static int ebAddToBucket(EbucketsType *type,
+                         FirstSegHdr *firstSegBkt,
+                         eItem item,
+                         EBucketNew *newBucket,
+                         uint64_t *updateBucketKey)
+{
+    newBucket->segment.head = NULL; /* no new bucket as default */
+
+    if (firstSegBkt->numSegs == 1) {
+        /* If bucket is a single, not full segment, then add the item to the segment */
+        if (firstSegBkt->totalItems < EB_SEG_MAX_ITEMS)
+            return ebSegAddAvail(type, firstSegBkt, item);
+
+        /* If bucket is a single, full segment, and segment split succeeded */
+        if (ebTrySegSplit(type, firstSegBkt, newBucket) == 1) {
+            /* The split got failed only because all items in the segment have the
+             * same bucket-key */
+            ExpireMeta *mItem = type->getExpireMeta(item);
+
+            /* Check which of the two segments the new item should be added to. Note that
+             * after the split, bucket-key of `newBucket` is bigger than bucket-key of
+             * `firstSegBkt`. That is `firstSegBkt` preserves its bucket-key value
+             * (and its location in rax tree) before the split */
+            if (EB_BUCKET_KEY(ebGetMetaExpTime(type->getExpireMeta(item))) < newBucket->ebKey) {
+                return ebSegAddAvail(type, firstSegBkt, item);
+            } else {
+                /* Add the `item` to the new bucket */
+                ebSegAddAvail(type, &(newBucket->segment), item);
+
+                /* if new item is now last item in the segment, then update lastItemBucket */
+                if (mItem->lastItemBucket)
+                    newBucket->mLast = mItem;
+                return 0;
+            }
+        }
+    }
+
+    /* If reached here, then either:
+     * (1) a bucket with multiple segments
+     * (2) Or, a single, full segment which failed to split.
+     *
+     * Either way, all items in the bucket have the same bucket-key value. Thus:
+     * (A) If 'item' has the same bucket-key as the ones in this bucket, then add it as well
+     * (B) Else, allocate a new bucket for it.
+     */
+
+    ExpireMeta *mHead = type->getExpireMeta(firstSegBkt->head);
+    ExpireMeta *mItem = type->getExpireMeta(item);
+
+    uint64_t bucketKey = EB_BUCKET_KEY(ebGetMetaExpTime(mHead)); /* same for all items in the segment */
+    uint64_t itemKey = EB_BUCKET_KEY(ebGetMetaExpTime(mItem));
+
+    if (bucketKey == itemKey) {
+        /* New item has the same bucket-key as the ones in this bucket, Add it as well */
+        if (mHead->numItems < EB_SEG_MAX_ITEMS)
+            return ebSegAddAvail(type, firstSegBkt, item); /* Add item to first segment */
+        else  {
+            /* If a regular segment becomes extended-segment, then update the
+             * bucket-key to be aligned with the expiration-time of the items
+             * it contains */
+            if (firstSegBkt->numSegs == 1)
+                *updateBucketKey = bucketKey;
+
+            return ebSegAddExtended(type, firstSegBkt, item); /* Add item in a new segment */
+        }
+    } else {
+        /* If the item cannot be added to the visited (extended-segment) bucket
+         * because it has a key not equal to bucket-key, then need to allocate a new
+         * bucket for the item. If the key of the item is below the bucket-key of
+         * the visited bucket, then the new item will be added to a new segment
+         * before it and the visited bucket key will be updated to accurately
+         * reflect the bucket-key of the (extended-segment) bucket */
+        if (bucketKey > itemKey)
+            *updateBucketKey = bucketKey;
+
+        ebNewBucket(type, newBucket, item, EB_BUCKET_KEY(ebGetMetaExpTime(mItem)));
+        return 0;
+    }
+}
+
+/*
+ * Remove item from rax
+ *
+ * Return 1 if removed. Otherwise, return 0
+ *
+ * Note: The function is optimized to remove items locally from segments without
+ *       traversing rax tree or stepping long extended-segments. Therefore, it is
+ *       assumed that the item is present in the bucket without verification.
+ *
+ * TODO: Written straightforward. Should be optimized to merge small segments.
+ */
+static int ebRemoveFromRax(ebuckets *eb, EbucketsType *type, eItem item) {
+    ExpireMeta *mItem = type->getExpireMeta(item);
+    rax *rax = ebGetRaxPtr(*eb);
+
+    /* if item is the only one left in a single-segment bucket, then delete bucket */
+    if (unlikely(mItem->firstItemBucket && mItem->lastItemBucket)) {
+        raxIterator ri;
+        raxStart(&ri, rax);
+        unsigned char raxKey[EB_KEY_SIZE];
+        bucketKey2RaxKey(EB_BUCKET_KEY(ebGetMetaExpTime(mItem)), raxKey);
+        raxSeek(&ri, "<=", raxKey, EB_KEY_SIZE);
+
+        if (raxNext(&ri) == 0)
+            return 0; /* not removed */
+
+        FirstSegHdr *segHdr = ri.data;
+
+        if (segHdr->head != item)
+            return 0; /* not removed */
+
+        zfree(segHdr);
+        raxRemove(ri.rt, ri.key, EB_KEY_SIZE, NULL);
+        raxStop(&ri);
+
+        /* If last bucket in rax, then delete the rax */
+        if (rax->numele == 0) {
+            raxFree(rax);
+            *eb = NULL;
+            return 1; /* removed */
+        }
+    } else if (mItem->numItems == 1) {
+        /* If the `item` is the only one in its segment, there must be additional
+         * items and segments in this bucket. If there weren't, the item would
+         * have been removed by the previous condition. */
+
+        if (mItem->firstItemBucket) {
+            /* If the first item/segment in extended-segments, then
+             * - Remove current segment (with single item) and promote next-segment to be first.
+             * - Update first item of next-segment to be firstItemBucket
+             * - Update `prevSeg` next-of-next segment to point new header of next-segment
+             * - Update FirstSegHdr to totalItems-1, numSegs-1 */
+            NextSegHdr *nextHdr = mItem->next;
+            FirstSegHdr *firstHdr = (FirstSegHdr *) nextHdr->prevSeg;
+            firstHdr->head = nextHdr->head;
+            firstHdr->totalItems--;
+            firstHdr->numSegs--;
+            zfree(nextHdr);
+            eItem *iter = firstHdr->head;
+            ExpireMeta *mIter = type->getExpireMeta(iter);
+            mIter->firstItemBucket = 1;
+            while (mIter->lastInSegment == 0) {
+                iter = mIter->next;
+                mIter = type->getExpireMeta(iter);
+            }
+            if (mIter->lastItemBucket)
+                mIter->next = firstHdr;
+            else
+                ((NextSegHdr *) mIter->next)->prevSeg = (CommonSegHdr *) firstHdr;
+
+        } else if (mItem->lastItemBucket) {
+            /* If last item/segment in bucket, then
+             * - promote previous segment to be last segment
+             * - Update FirstSegHdr to totalItems-1, numSegs-1 */
+            NextSegHdr *currHdr = mItem->next;
+            CommonSegHdr *prevHdr = currHdr->prevSeg;
+            eItem iter = prevHdr->head;
+            ExpireMeta *mIter = type->getExpireMeta(iter);
+            while (mIter->lastInSegment == 0) {
+                iter = mIter->next;
+                mIter = type->getExpireMeta(iter);
+            }
+            currHdr->firstSeg->totalItems--;
+            currHdr->firstSeg->numSegs--;
+            mIter->next = prevHdr;
+            mIter->lastItemBucket = 1;
+            zfree(currHdr);
+
+        } else {
+            /* item/segment is not the first or last item/segment.
+             * - Update previous segment to point next segment.
+             * - Update `prevSeg` of next segment
+             * - Update FirstSegHdr to totalItems-1, numSegs-1 */
+            NextSegHdr *nextHdr = mItem->next;
+            NextSegHdr *currHdr = (NextSegHdr *) nextHdr->prevSeg;
+            CommonSegHdr *prevHdr = currHdr->prevSeg;
+
+            ExpireMeta *mIter = type->getExpireMeta(prevHdr->head);
+            while (mIter->lastInSegment == 0)
+                mIter = type->getExpireMeta(mIter->next);
+
+            mIter->next = nextHdr;
+            nextHdr->prevSeg = prevHdr;
+            nextHdr->firstSeg->totalItems--;
+            nextHdr->firstSeg->numSegs--;
+            zfree(currHdr);
+
+        }
+    } else {
+        /* At least 2 items in current segment */
+        if (mItem->numItems) {
+            /* If item is first item in segment (Must be numItems>1), then
+             * - Find segment header and update to point next item.
+             * - Let next inherit 'item' flags {firstItemBucket, numItems-1}
+             * - Update FirstSegHdr to totalItems-1 */
+            ExpireMeta *mIter = mItem;
+            CommonSegHdr *currHdr;
+            while (mIter->lastInSegment == 0)
+                mIter = type->getExpireMeta(mIter->next);
+            if (mIter->lastItemBucket)
+                currHdr = (CommonSegHdr *) mIter->next;
+            else
+                currHdr = (CommonSegHdr *) ((NextSegHdr *) mIter->next)->prevSeg;
+
+            if (mItem->firstItemBucket)
+                ((FirstSegHdr *) currHdr)->totalItems--;
+            else
+                ((NextSegHdr *) currHdr)->firstSeg->totalItems--;
+
+            eItem *newHead = mItem->next;
+            ExpireMeta *mNewHead = type->getExpireMeta(newHead);
+            mNewHead->firstItemBucket = mItem->firstItemBucket;
+            mNewHead->numItems = mItem->numItems - 1;
+            currHdr->head = newHead;
+
+        } else if (mItem->lastInSegment) {
+            /* If item is last in segment, then
+             * - find previous item and let it inherit (next, lastInSegment, lastItemBucket)
+             * - Find and update segment header to numItems-1
+             * - Update FirstSegHdr to totalItems-1 */
+            CommonSegHdr *currHdr;
+            if (mItem->lastItemBucket)
+                currHdr = (CommonSegHdr *) mItem->next;
+            else
+                currHdr = (CommonSegHdr *) ((NextSegHdr *) mItem->next)->prevSeg;
+
+            ExpireMeta *mHead = type->getExpireMeta(currHdr->head);
+            mHead->numItems--;
+            ExpireMeta *mIter = mHead;
+            while (mIter->next != item)
+                mIter = type->getExpireMeta(mIter->next);
+
+            mIter->next = mItem->next;
+            mIter->lastInSegment = mItem->lastInSegment;
+            mIter->lastItemBucket = mItem->lastItemBucket;
+
+            if (mHead->firstItemBucket)
+                ((FirstSegHdr *) currHdr)->totalItems--;
+            else
+                ((NextSegHdr *) currHdr)->firstSeg->totalItems--;
+
+        } else {
+            /* - Item is in the middle of segment. Find previous item and update to point next.
+             * - Find and Update segment header to numItems-1
+             * - Update FirstSegHdr to totalItems-1 */
+            ExpireMeta *mIter = mItem;
+            CommonSegHdr *currHdr;
+            while (mIter->lastInSegment == 0)
+                mIter = type->getExpireMeta(mIter->next);
+            if (mIter->lastItemBucket)
+                currHdr = (CommonSegHdr *) mIter->next;
+            else
+                currHdr = (CommonSegHdr *) ((NextSegHdr *) mIter->next)->prevSeg;
+
+            ExpireMeta *mHead = type->getExpireMeta(currHdr->head);
+            mHead->numItems--;
+            mIter = mHead;
+            while (mIter->next != item)
+                mIter = type->getExpireMeta(mIter->next);
+
+            mIter->next = mItem->next;
+            mIter->lastInSegment = mItem->lastInSegment;
+            mIter->lastItemBucket = mItem->lastItemBucket;
+
+            if (mHead->firstItemBucket)
+                ((FirstSegHdr *) currHdr)->totalItems--;
+            else
+                ((NextSegHdr *) currHdr)->firstSeg->totalItems--;
+        }
+    }
+    *ebRaxNumItems(rax) -= 1;
+    return 1; /* removed */
+}
+
+int ebAddToRax(ebuckets *eb, EbucketsType *type, eItem item, uint64_t bucketKeyItem) {
+    EBucketNew newBucket; /* ebAddToBucket takes care to update newBucket.segment.head */
+    raxIterator iter;
+    unsigned char raxKey[EB_KEY_SIZE];
+    bucketKey2RaxKey(bucketKeyItem, raxKey);
+    rax *rax = ebGetRaxPtr(*eb);
+    raxStart(&iter,rax);
+    raxSeek(&iter, "<=", raxKey, EB_KEY_SIZE);
+    *ebRaxNumItems(rax) += 1;
+    /* If expireTime of the item is below the bucket-key of first bucket in rax,
+     * then need to add it as a new bucket at the beginning of the rax. */
+    if(raxNext(&iter) == 0) {
+        FirstSegHdr *firstSegHdr = zmalloc(sizeof(FirstSegHdr));
+        firstSegHdr->head = item;
+        firstSegHdr->totalItems = 1;
+        firstSegHdr->numSegs = 1;
+
+        /* update last item to point on the segment header */
+        ExpireMeta *metaItem = type->getExpireMeta(item);
+        metaItem->lastItemBucket = 1;
+        metaItem->lastInSegment = 1;
+        metaItem->firstItemBucket = 1;
+        metaItem->numItems = 1;
+        metaItem->next = firstSegHdr;
+        bucketKey2RaxKey(bucketKeyItem, raxKey);
+        raxInsert(rax, raxKey, EB_KEY_SIZE, firstSegHdr, NULL);
+        raxStop(&iter);
+        return 0;
+    }
+
+    /* Add the new item into the first segment of the bucket that we found */
+    uint64_t updateBucketKey = 0;
+    ebAddToBucket(type, iter.data, item, &newBucket, &updateBucketKey);
+
+    /* If following the addition need to `updateBucketKey` of `foundBucket` in rax */
+    if(unlikely(updateBucketKey && updateBucketKey != raxKey2BucketKey(iter.key))) {
+        raxRemove(iter.rt, iter.key, EB_KEY_SIZE, NULL);
+        bucketKey2RaxKey(updateBucketKey, raxKey);
+        raxInsert(iter.rt, raxKey, EB_KEY_SIZE, iter.data, NULL);
+    }
+
+    /* If ebAddToBucket() returned a new bucket, then add the bucket to rax.
+     *
+     * This might happen when trying to add another item to a bucket that is:
+     * 1. A single, full segment. Will result in a bucket (segment) split.
+     * 2. Extended segment with a different bucket-key than the new item.
+     *    Will result in a new bucket (of size 1) for the new item.
+     */
+    if (newBucket.segment.head != NULL) {
+        /* Allocate segment header for the new bucket */
+        FirstSegHdr *newSeg = zmalloc(sizeof(FirstSegHdr));
+        /* Move the segment from 'newBucket' to allocated segment header */
+        *newSeg = newBucket.segment;
+        /* Update 'next' of last item in segment to point to 'FirstSegHdr` */
+        newBucket.mLast->next = newSeg;
+        /* Insert the new bucket to rax */
+        bucketKey2RaxKey(newBucket.ebKey, raxKey);
+        raxInsert(iter.rt, raxKey, EB_KEY_SIZE, newSeg, NULL);
+    }
+
+    raxStop(&iter);
+    return 0;
+}
+
+/* Validate the general structure of the buckets in rax */
+static void ebValidateRax(rax *rax, EbucketsType *type) {
+    uint64_t numItemsTotal = 0;
+    raxIterator raxIter;
+    raxStart(&raxIter, rax);
+    raxSeek(&raxIter, "^", NULL, 0);
+    while (raxNext(&raxIter)) {
+        int expectFirstItemBucket = 1;
+        FirstSegHdr *firstSegHdr = raxIter.data;
+        eItem iter;
+        ExpireMeta *mIter, *mHead;
+        iter = firstSegHdr->head;
+        mHead = type->getExpireMeta(iter);
+        uint64_t numItemsBucket = 0, countSegments = 0;
+
+        int extendedSeg = (firstSegHdr->numSegs > 1) ? 1 : 0;
+        void *segHdr = firstSegHdr;
+
+        mIter = type->getExpireMeta(iter);
+        while (1) {
+            uint64_t curBktKey, prevBktKey;
+            for (int i = 0; i < mHead->numItems ; ++i) {
+                assert(iter != NULL);
+                mIter = type->getExpireMeta(iter);
+                curBktKey = EB_BUCKET_KEY(ebGetMetaExpTime(mIter));
+
+                if (i == 0) {
+                    assert(mIter->numItems > 0 && mIter->numItems <= EB_SEG_MAX_ITEMS);
+                    assert(mIter->firstItemBucket == expectFirstItemBucket);
+                    expectFirstItemBucket = 0;
+                    prevBktKey = curBktKey;
+                } else  {
+                    assert( (extendedSeg && prevBktKey == curBktKey) ||
+                            (!extendedSeg && prevBktKey <= curBktKey) );
+                    assert(mIter->numItems == 0);
+                    assert(mIter->firstItemBucket == 0);
+                    prevBktKey = curBktKey;
+                }
+
+                if (i == mHead->numItems - 1)
+                    assert(mIter->lastInSegment == 1);
+                else
+                    assert(mIter->lastInSegment == 0);
+
+                iter = mIter->next;
+            }
+
+            numItemsBucket += mHead->numItems;
+            countSegments += 1;
+
+            if (mIter->lastItemBucket)
+                break;
+
+            NextSegHdr *nextSegHdr = mIter->next;
+            assert(nextSegHdr->firstSeg == firstSegHdr);
+            assert(nextSegHdr->prevSeg == segHdr);
+            iter = nextSegHdr->head;
+            mHead = type->getExpireMeta(iter);
+            segHdr = nextSegHdr;
+        }
+        /* Verify next of last item, `totalItems` and `numSegs` in iterated bucket */
+        assert(mIter->next == segHdr);
+        assert(numItemsBucket == firstSegHdr->totalItems);
+        assert(countSegments == firstSegHdr->numSegs);
+        numItemsTotal += numItemsBucket;
+    }
+    raxStop(&raxIter);
+    assert(numItemsTotal == *ebRaxNumItems(rax));
+}
+
+struct deleteCbCtx { EbucketsType *type; void *userCtx; };
+void ebRaxDeleteCb(void *item, void *context) {
+    struct deleteCbCtx *ctx = context;
+    FirstSegHdr *firstSegHdr = item;
+    eItem itemIter = firstSegHdr->head;
+    uint32_t numSegs = firstSegHdr->numSegs;
+    void *nextSegHdr = firstSegHdr;
+
+    for (uint32_t seg=0 ; seg < numSegs ; seg++) {
+        zfree(nextSegHdr);
+
+        ExpireMeta *mIter = ctx->type->getExpireMeta(itemIter);
+        uint32_t numItemsInSeg = mIter->numItems;
+
+        for (uint32_t i = 0; i < numItemsInSeg ; ++i) {
+            mIter = ctx->type->getExpireMeta(itemIter);
+            eItem toDelete = itemIter;
+            mIter->trash = 1;
+            itemIter = mIter->next;
+            if (ctx->type->onDeleteItem) ctx->type->onDeleteItem(toDelete, &ctx->userCtx);
+        }
+        nextSegHdr = itemIter;
+
+        if (seg + 1 < numSegs)
+            itemIter = ((NextSegHdr *) nextSegHdr)->head;
+    }
+
+}
+
+static void _ebPrint(ebuckets eb, EbucketsType *type, int64_t usedMem, int printItems) {
+    if (ebIsEmpty(eb)) {
+        printf("Empty ebuckets\n");
+        return;
+    }
+
+    if (ebIsList(eb)) {
+        /* mock rax segment */
+        eItem head = ebGetListPtr(type, eb);
+        ExpireMeta *metaHead = type->getExpireMeta(head);
+        FirstSegHdr mockSeg = { head, metaHead->numItems, 1};
+        if (printItems)
+            ebBucketPrint(0, type, &mockSeg);
+        return;
+    }
+
+    uint64_t totalItems = 0;
+    uint64_t numBuckets = 0;
+    uint64_t numSegments = 0;
+
+    rax *rax = ebGetRaxPtr(eb);
+    raxIterator iter;
+    raxStart(&iter, rax);
+    raxSeek(&iter, "^", NULL, 0);
+    while (raxNext(&iter)) {
+        FirstSegHdr *seg = iter.data;
+        if (printItems)
+            ebBucketPrint(raxKey2BucketKey(iter.key), type, seg);
+        totalItems += seg->totalItems;
+        numBuckets++;
+        numSegments += seg->numSegs;
+    }
+
+    printf("Total number of items              : %" PRIu64 "\n", totalItems);
+    printf("Total number of buckets            : %" PRIu64 "\n", numBuckets);
+    printf("Total number of segments           : %" PRIu64 "\n", numSegments);
+    printf("Average items per bucket           : %.2f\n",
+           (double) totalItems / numBuckets);
+    printf("Average items per segment          : %.2f\n",
+           (double) totalItems / numSegments);
+    printf("Average segments per bucket        : %.2f\n",
+           (double) numSegments / numBuckets);
+
+    if (usedMem != -1)
+    {
+        printf("\nEbuckets memory usage (including FirstSegHdr/NexSegHdr):\n");
+        printf("Total                              : %.2f KBytes\n",
+               (double) usedMem / 1024);
+        printf("Average per bucket                 : %" PRIu64 " Bytes\n",
+               usedMem / numBuckets);
+        printf("Average per item                   : %" PRIu64 " Bytes\n",
+               usedMem / totalItems);
+        printf("EB_BUCKET_KEY_PRECISION            : %d\n",
+               EB_BUCKET_KEY_PRECISION);
+        printf("EB_SEG_MAX_ITEMS                   : %d\n",
+               EB_SEG_MAX_ITEMS);
+    }
+    raxStop(&iter);
+}
+
+/*** API functions ***/
+
+/**
+ * Deletes all items from given ebucket, invoking optional item deletion callbacks.
+ *
+ * @param eb - The ebucket to be deleted.
+ * @param type - Pointer to the EbucketsType structure defining the type of ebucket.
+ * @param ctx - A context pointer that can be used in optional item deletion callbacks.
+ */
+void ebDestroy(ebuckets *eb, EbucketsType *type, void *ctx) {
+    if (ebIsEmpty(*eb))
+        return;
+
+    if (ebIsList(*eb)) {
+        eItem head = ebGetListPtr(type, *eb);
+        eItem *pItemNext = &head;
+        while ( (*pItemNext) != NULL) {
+            eItem toDelete = *pItemNext;
+            ExpireMeta *metaToDelete = type->getExpireMeta(toDelete);
+            *pItemNext = metaToDelete->next;
+            metaToDelete->trash = 1;
+            if (type->onDeleteItem) type->onDeleteItem(toDelete, ctx);
+        }
+    } else {
+        struct deleteCbCtx deleteCtx = {type, ctx};
+        raxFreeWithCbAndContext(ebGetRaxPtr(*eb), ebRaxDeleteCb, &deleteCtx);
+    }
+
+    *eb = NULL;
+}
+
+/**
+ * Removes the specified item from the given ebucket, updating the ebuckets handler
+ * accordingly. The function is optimized to remove items locally from segments
+ * without traversing rax tree or stepping long extended-segments. Therefore,
+ * it is assumed that the item is present in the bucket without verification.
+ *
+ * @param eb   - Pointer to the ebuckets handler, which may get updated if the removal
+ *               affects the structure.
+ * @param type - Pointer to the EbucketsType structure defining the type of ebucket.
+ * @param item - The eItem to be removed from the ebucket.
+ *
+ * @return 1 if the item was successfully removed; otherwise, return 0.
+ */
+int ebRemove(ebuckets *eb, EbucketsType *type, eItem item) {
+
+    if (ebIsEmpty(*eb))
+        return 0; /* not removed */
+
+    int res;
+    if (ebIsList(*eb))
+        res = ebRemoveFromList(eb, type, item);
+    else  /* rax */
+        res = ebRemoveFromRax(eb, type, item);
+
+    /* if removed then mark as trash */
+    if (res)
+        type->getExpireMeta(item)->trash = 1;
+
+    EB_VALIDATE_STRUCTURE(*eb, type);
+
+    return res;
+}
+
+/**
+ * Adds the specified item to the ebucket structure based on expiration time.
+ * If the ebucket is a list or empty, it attempts to add the item to the list.
+ * Otherwise, it adds the item to rax. If the list reaches its maximum size, it
+ * is converted to rax. The ebuckets handler may be updated accordingly.
+ *
+ * @param eb - Pointer to the ebuckets handler, which may get updated
+ * @param type - Pointer to the EbucketsType structure defining the type of ebucket.
+ * @param item - The eItem to be added to the ebucket.
+ * @param expireTime - The expiration time of the item.
+ *
+ * @return 0 (C_OK) if the item was successfully added;
+ *         Otherwise, return -1 (C_ERR) on failure.
+ */
+int ebAdd(ebuckets *eb, EbucketsType *type, eItem item, uint64_t expireTime) {
+    int res;
+
+    assert(expireTime <= EB_EXPIRE_TIME_MAX);
+
+    /* Set expire-time and reset segment flags */
+    ExpireMeta *itemMeta = type->getExpireMeta(item);
+    ebSetMetaExpTime(itemMeta, expireTime);
+    itemMeta->lastInSegment = 0;
+    itemMeta->firstItemBucket = 0;
+    itemMeta->lastItemBucket = 0;
+    itemMeta->numItems = 0;
+    itemMeta->trash = 0;
+
+    if (ebIsList(*eb) || (ebIsEmpty(*eb))) {
+        /* Try add item to list */
+        if ( (res = ebAddToList(eb, type, item)) == 1) {
+            /* Failed to add since list reached maximum size. Convert to rax */
+            *eb = ebConvertListToRax(ebGetListPtr(type, *eb), type);
+            res = ebAddToRax(eb, type, item, EB_BUCKET_KEY(expireTime));
+        }
+    } else {
+        /* Add item to rax */
+        res = ebAddToRax(eb, type, item, EB_BUCKET_KEY(expireTime));
+    }
+
+    EB_VALIDATE_STRUCTURE(*eb, type);
+
+    return res;
+}
+
+/**
+ * Performs expiration on the given ebucket, removing items that have expired.
+ *
+ * If all items in the data structure are expired, 'eb' will be set to NULL.
+ *
+ * @param eb - Pointer to the ebuckets handler, which may get updated
+ * @param type - Pointer to the EbucketsType structure defining the type of ebucket.
+ * @param info - Providing information about the expiration action.
+ */
+void ebExpire(ebuckets *eb, EbucketsType *type, ExpireInfo *info) {
+    /* updateList - maintain a list of expired items that the callback `onExpireItem`
+     * indicated to update their expiration time rather than removing them.
+     * At the end of this function, the items will be `ebAdd()` back.
+     *
+     * Note, this list of items does not allocate any memory, but temporary reuses
+     * the `next` pointer of the `ExpireMeta` structure of the expired items. */
+    eItem updateList = NULL;
+
+    /* reset info outputs */
+    info->nextExpireTime = EB_EXPIRE_TIME_INVALID;
+    info->itemsExpired = 0;
+
+    /* if empty ebuckets */
+    if (ebIsEmpty(*eb)) return;
+
+    if (ebIsList(*eb)) {
+        ebListExpire(eb, type, info, &updateList);
+        goto END_ACTEXP;
+    }
+
+    /* handle rax expiry */
+
+    rax *rax = ebGetRaxPtr(*eb);
+    raxIterator iter;
+
+    raxStart(&iter, rax);
+
+    uint64_t nowKey = EB_BUCKET_KEY(info->now);
+    uint64_t itemsExpiredBefore = info->itemsExpired;
+
+    while (1) {
+        raxSeek(&iter,"^",NULL,0);
+        if (!raxNext(&iter)) break;
+
+        uint64_t bucketKey = raxKey2BucketKey(iter.key);
+
+        FirstSegHdr *firstSegHdr = iter.data;
+
+        /* We need to take into consideration EB_BUCKET_KEY_PRECISION. The value of
+         * "info->now" will be adjusted to lookup only for all buckets with assigned
+         * keys that are older than 1<<EB_BUCKET_KEY_PRECISION msec ago. That is, it
+         * is needed to visit only the buckets with keys that are "<" than:
+         * EB_BUCKET_KEY(info->now). */
+        if (bucketKey >= nowKey) {
+            /* Take care to update next expire time based on next segment to expire */
+            info->nextExpireTime = ebGetMetaExpTime(
+                    type->getExpireMeta(firstSegHdr->head));
+            break;
+        }
+
+        /* If not managed to remove entire bucket then return */
+        if (ebSegExpire(firstSegHdr, type, info, &updateList) == 0)
+            break;
+
+        raxRemove(iter.rt, iter.key, EB_KEY_SIZE, NULL);
+    }
+
+    raxStop(&iter);
+    *ebRaxNumItems(rax) -= info->itemsExpired - itemsExpiredBefore;
+
+    if(raxEOF(&iter) && (updateList == 0)) {
+        raxFree(rax);
+        *eb = NULL;
+    }
+
+END_ACTEXP:
+    /* Add back items with updated expiration time */
+    while (updateList) {
+        ExpireMeta *mItem = type->getExpireMeta(updateList);
+        eItem next = mItem->next;
+        uint64_t expireAt = ebGetMetaExpTime(mItem);
+
+        /* Update next minimum expire time if needed.
+         * Condition is valid also if nextExpireTime is EB_EXPIRE_TIME_INVALID */
+        if (expireAt < info->nextExpireTime)
+            info->nextExpireTime = expireAt;
+
+        ebAdd(eb, type, updateList, expireAt);
+        updateList = next;
+    }
+
+    EB_VALIDATE_STRUCTURE(*eb, type);
+
+    return;
+}
+
+/* Performs active expiration dry-run to evaluate number of expired items
+ *
+ * It is faster than actual active-expire because it iterates only over the
+ * headers of the buckets until the first non-expired bucket, and no more than
+ * EB_SEG_MAX_ITEMS items in the last bucket
+ *
+ * @param eb - The ebucket to be checked.
+ * @param type - Pointer to the EbucketsType structure defining the type of ebucket.
+ * @param now - The current time in milliseconds.
+ */
+uint64_t ebExpireDryRun(ebuckets eb, EbucketsType *type, uint64_t now) {
+    if (ebIsEmpty(eb)) return 0;
+
+    uint64_t numExpired = 0;
+
+    /* If list, then iterate and count expired ones */
+    if (ebIsList(eb)) {
+        ExpireMeta *mIter = type->getExpireMeta(ebGetListPtr(type, eb));
+        while (1) {
+            if (ebGetMetaExpTime(mIter) >= now)
+                return numExpired;
+
+            numExpired++;
+
+            if (mIter->lastInSegment)
+                return numExpired;
+
+            mIter = type->getExpireMeta(mIter->next);
+        }
+    }
+
+    /* Handle rax active-expire */
+    rax *rax = ebGetRaxPtr(eb);
+    raxIterator iter;
+    raxStart(&iter, rax);
+    uint64_t nowKey = EB_BUCKET_KEY(now);
+    raxSeek(&iter,"^",NULL,0);
+    assert(raxNext(&iter)); /* must be at least one bucket */
+    FirstSegHdr *currBucket = iter.data;
+
+    while (1) {
+        /* if 'currBucket' is last bucket, then break */
+        if(!raxNext(&iter)) break;
+        FirstSegHdr *nextBucket = iter.data;
+
+        /* if 'nextBucket' is not less than now then break */
+        if (raxKey2BucketKey(iter.key) >= nowKey) break;
+
+        /* nextBucket less than now. For sure all items in currBucket are expired */
+        numExpired += currBucket->totalItems;
+        currBucket = nextBucket;
+    }
+    raxStop(&iter);
+
+    /* If single segment bucket, iterate over items and count expired ones */
+    if (currBucket->numSegs == 1) {
+        ExpireMeta *mIter = type->getExpireMeta(currBucket->head);
+        while (1) {
+            if (ebGetMetaExpTime(mIter) >= now)
+                return numExpired;
+
+            numExpired++;
+
+            if (mIter->lastInSegment)
+                return numExpired;
+
+            mIter = type->getExpireMeta(mIter->next);
+        }
+    }
+
+    /* Bucket key exactly reflect expiration time of all items (currBucket->numSegs > 1) */
+    if (EB_BUCKET_KEY_PRECISION == 0) {
+        if (ebGetMetaExpTime(type->getExpireMeta(currBucket->head)) >= now)
+            return numExpired;
+        else
+            return numExpired + currBucket->totalItems;
+    }
+
+    /* Iterate extended-segment and count expired ones */
+
+    /* Unreachable code, provided for completeness. Following operation is not
+     * bound in time and this is the main reason why we set above
+     * EB_BUCKET_KEY_PRECISION to 0 and have early return on previous condition */
+
+    ExpireMeta *mIter = type->getExpireMeta(currBucket->head);
+    while(1) {
+        if (ebGetMetaExpTime(mIter) < now)
+            numExpired++;
+
+        if (mIter->lastItemBucket)
+            return numExpired;
+
+        if (mIter->lastInSegment)
+            mIter = type->getExpireMeta(((NextSegHdr *) mIter->next)->head);
+        else
+            mIter = type->getExpireMeta(mIter->next);
+    }
+}
+
+/**
+ * Retrieves the expiration time of the item with the nearest expiration
+ *
+ * @param eb - The ebucket to be checked.
+ * @param type - Pointer to the EbucketsType structure defining the type of ebucket.
+ *
+ * @return The expiration time of the item with the nearest expiration time in
+ *         the ebucket. If empty, return EB_EXPIRE_TIME_INVALID. If ebuckets is
+ *         of type rax and minimal bucket is extended-segment, then it might not
+ *         return accurate result up-to 1<<EB_BUCKET_KEY_PRECISION-1 msec (we
+ *         don't want to traverse the entire extended-segment since it might not
+ *         bounded).
+ */
+uint64_t ebGetNextTimeToExpire(ebuckets eb, EbucketsType *type) {
+    if (ebIsEmpty(eb))
+        return EB_EXPIRE_TIME_INVALID;
+
+    if (ebIsList(eb))
+        return ebGetMetaExpTime(type->getExpireMeta(ebGetListPtr(type, eb)));
+
+    /* rax */
+    uint64_t minExpire;
+    rax *rax = ebGetRaxPtr(eb);
+    raxIterator iter;
+    raxStart(&iter, rax);
+    raxSeek(&iter, "^", NULL, 0);
+    raxNext(&iter); /* seek to the last bucket */
+    FirstSegHdr *firstSegHdr = iter.data;
+    if ((firstSegHdr->numSegs == 1) || (EB_BUCKET_KEY_PRECISION == 0)) {
+        /* Single segment, or extended-segments that all have same expiration time.
+         * return the first item with the nearest expiration time */
+        minExpire = ebGetMetaExpTime(type->getExpireMeta(firstSegHdr->head));
+    } else {
+
+        /* If reached here, then it is because it is extended segment and buckets
+         * are with lower precision than 1msec. In that case it is better not to
+         * iterate extended-segments, which might be unbounded, and just return
+         * worst possible expiration time in this bucket.
+         *
+         * The reason we return blindly worst case expiration time value in this
+         * bucket is because the only usage of this function is to figure out
+         * when is the next time active expiration should be performed, and it
+         * is better to do it only after 1 or more items were expired and not the
+         * other way around.
+         */
+        uint64_t expTime = ebGetMetaExpTime(type->getExpireMeta(firstSegHdr->head));
+        minExpire = expTime | ( (1<<EB_BUCKET_KEY_PRECISION)-1) ;
+    }
+    raxStop(&iter);
+    return minExpire;
+}
+
+/**
+ * Retrieves the expiration time of the item with the latest expiration
+ *
+ * However, precision loss (EB_BUCKET_KEY_PRECISION) in rax tree buckets
+ * may result in slight inaccuracies, up to a variation of
+ * 1<<EB_BUCKET_KEY_PRECISION msec.
+ *
+ * @param eb - The ebucket to be checked.
+ * @param type - Pointer to the EbucketsType structure defining the type of ebucket.
+ * @param accurate - If 1, then the function will return accurate result. Otherwise,
+ *                   it might return the upper limit with slight inaccuracy of
+ *                   1<<EB_BUCKET_KEY_PRECISION msec.
+ *
+ *                   This special case is relevant only when the last bucket
+ *                   is of type extended-segment. In this case, we might don't
+ *                   want to traverse the entire bucket to find the accurate
+ *                   expiration time  since there might be unbounded number of
+ *                   items in the extended-segment. If EB_BUCKET_KEY_PRECISION
+ *                   is 0, then the function will return accurate result anyway.
+ *
+ * @return The expiration time of the item with the latest expiration time in
+ *         the ebucket. If empty, return EB_EXPIRE_TIME_INVALID.
+ */
+uint64_t ebGetMaxExpireTime(ebuckets eb, EbucketsType *type, int accurate) {
+    if (ebIsEmpty(eb))
+        return EB_EXPIRE_TIME_INVALID;
+
+    if (ebIsList(eb)) {
+        eItem item = ebGetListPtr(type, eb);
+        ExpireMeta *em = type->getExpireMeta(item);
+        while (em->lastInSegment == 0)
+            em = type->getExpireMeta(em->next);
+        return ebGetMetaExpTime(em);
+    }
+
+    /* rax */
+    uint64_t maxExpire;
+    rax *rax = ebGetRaxPtr(eb);
+    raxIterator iter;
+    raxStart(&iter, rax);
+    raxSeek(&iter, "$", NULL, 0);
+    raxNext(&iter); /* seek to the last bucket */
+    FirstSegHdr *firstSegHdr = iter.data;
+    if (firstSegHdr->numSegs == 1) {
+        /* Single segment. return the last item with the highest expiration time */
+        ExpireMeta *em = type->getExpireMeta(firstSegHdr->head);
+        while (em->lastInSegment == 0)
+            em = type->getExpireMeta(em->next);
+        maxExpire = ebGetMetaExpTime(em);
+    } else if (EB_BUCKET_KEY_PRECISION == 0) {
+        /* Extended-segments that all have same expiration time */
+        maxExpire = ebGetMetaExpTime(type->getExpireMeta(firstSegHdr->head));
+    } else {
+        if (accurate == 0) {
+            /* return upper limit of the last bucket */
+            int mask = (1<<EB_BUCKET_KEY_PRECISION)-1;
+            uint64_t expTime = ebGetMetaExpTime(type->getExpireMeta(firstSegHdr->head));
+            maxExpire = (expTime + (mask+1)) & (~mask);
+        } else {
+            maxExpire = 0;
+            ExpireMeta *mIter = type->getExpireMeta(firstSegHdr->head);
+            while(1) {
+                while(1) {
+                    if (maxExpire < ebGetMetaExpTime(mIter))
+                        maxExpire = ebGetMetaExpTime(mIter);
+                    if (mIter->lastInSegment == 1) break;
+                    mIter = type->getExpireMeta(mIter->next);
+                }
+
+                if (mIter->lastItemBucket) break;
+                mIter = type->getExpireMeta(((NextSegHdr *) mIter->next)->head);
+            }
+        }
+    }
+    raxStop(&iter);
+    return maxExpire;
+}
+
+/**
+ * Retrieves the total number of items in the ebucket.
+ */
+uint64_t ebGetTotalItems(ebuckets eb, EbucketsType *type) {
+    if (ebIsEmpty(eb))
+        return 0;
+
+    if (ebIsList(eb))
+        return type->getExpireMeta(ebGetListPtr(type, eb))->numItems;
+    else
+        return *ebRaxNumItems(ebGetRaxPtr(eb));
+}
+
+/* print expiration-time of items, ebuckets layout and some statistics */
+void ebPrint(ebuckets eb, EbucketsType *type) {
+    _ebPrint(eb, type, -1, 1);
+}
+
+/* Validate the general structure of ebuckets. Calls assert(0) on error. */
+void ebValidate(ebuckets eb, EbucketsType *type) {
+    if (ebIsEmpty(eb))
+        return;
+
+    if (ebIsList(eb))
+        ebValidateList(ebGetListPtr(type, eb), type);
+    else
+        ebValidateRax(ebGetRaxPtr(eb), type);
+}
+
+/* Reallocates the memory used by the item using the provided allocation function.
+ * This feature was added for the active defrag feature.
+ *
+ * The 'defragfn' callbacks are called with a pointer to memory that callback
+ * can reallocate. The callbacks should return a new memory address or NULL,
+ * where NULL means that no reallocation happened and the old memory is still valid.
+ * 
+ * Note: It is the caller's responsibility to ensure that the item has a valid expire time. */
+eItem ebDefragItem(ebuckets *eb, EbucketsType *type, eItem item, ebDefragFunction *defragfn) {
+    assert(!ebIsEmpty(*eb));
+    if (ebIsList(*eb)) {
+        ExpireMeta *prevem = NULL;
+        eItem curitem = ebGetListPtr(type, *eb);
+        while (curitem != NULL) {
+            if (curitem == item) {
+                if ((curitem = defragfn(curitem))) {
+                    if (prevem)
+                        prevem->next = curitem;
+                    else
+                        *eb = ebMarkAsList(curitem);
+                }
+                return curitem;
+            }
+
+            /* Move to the next item in the list. */
+            prevem = type->getExpireMeta(curitem);
+            curitem = prevem->next;
+        }
+    } else {
+        CommonSegHdr *currHdr;
+        ExpireMeta *mIter = type->getExpireMeta(item);
+        assert(mIter->trash != 1);
+        while (mIter->lastInSegment == 0)
+            mIter = type->getExpireMeta(mIter->next);
+
+        if (mIter->lastItemBucket)
+            currHdr = (CommonSegHdr *) mIter->next;
+        else  
+            currHdr = (CommonSegHdr *) ((NextSegHdr *) mIter->next)->prevSeg;
+        /* If the item is the first in the segment, then update the segment header */
+        if (currHdr->head == item) {
+            if ((item = defragfn(item))) {
+                currHdr->head = item;
+            }
+            return item;
+        }
+
+        /* Iterate over all items in the segment until the next is 'item' */
+        ExpireMeta *mHead = type->getExpireMeta(currHdr->head);
+        mIter = mHead;
+        while (mIter->next != item)
+            mIter = type->getExpireMeta(mIter->next);
+        assert(mIter->next == item);
+
+        if ((item = defragfn(item))) {
+            mIter->next = item;
+        }
+        return item;
+    }
+    redis_unreachable();
+}
+
+/* Retrieves the expiration time associated with the given item. If associated
+ * ExpireMeta is marked as trash, then return EB_EXPIRE_TIME_INVALID */
+uint64_t ebGetExpireTime(EbucketsType *type, eItem item) {
+    ExpireMeta *meta = type->getExpireMeta(item);
+    if (unlikely(meta->trash)) return EB_EXPIRE_TIME_INVALID;
+    return ebGetMetaExpTime(meta);
+}
+
+/*** Unit tests ***/
+
+#ifdef REDIS_TEST
+#include <stddef.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <string.h>
+#include "testhelp.h"
+
+#define TEST(name) printf("[TEST] >>> %s\n", name);
+#define TEST_COND(name, cond) printf("[%s] >>> %s\n", (cond) ? "TEST" : "BYPS", name);  if (cond)
+
+typedef struct MyItem {
+    int index;
+    ExpireMeta mexpire;
+} MyItem;
+
+typedef struct TimeRange {
+    uint64_t start;
+    uint64_t end;
+} TimeRange;
+
+ExpireMeta *getMyItemExpireMeta(const eItem item) {
+    return &((MyItem *)item)->mexpire;
+}
+
+ExpireAction expireItemCb(void *item, eItem ctx);
+void deleteItemCb(eItem item, void *ctx);
+EbucketsType myEbucketsType = {
+    .getExpireMeta = getMyItemExpireMeta,
+    .onDeleteItem = deleteItemCb,
+    .itemsAddrAreOdd = 0,
+};
+
+EbucketsType myEbucketsType2 = {
+    .getExpireMeta = getMyItemExpireMeta,
+    .onDeleteItem = NULL,
+    .itemsAddrAreOdd = 0,
+};
+
+/* XOR over all items time-expiration. Must be 0 after all addition/removal */
+uint64_t expItemsHashValue = 0;
+
+ExpireAction expireItemCb(eItem item, void *ctx) {
+    ExpireMeta *meta = myEbucketsType.getExpireMeta(item);
+    uint64_t expTime = ebGetMetaExpTime(meta);
+    expItemsHashValue = expItemsHashValue ^ expTime;
+
+    TimeRange *range = (TimeRange *) ctx;
+    /* Verify expiration time is within the range */
+    if (range != NULL) assert(expTime >= range->start && expTime <= range->end);
+
+/* If benchmarking then avoid from heavyweight free operation. It is user side logic */
+#ifndef EB_TEST_BENCHMARK
+    zfree(item);
+#endif
+    return ACT_REMOVE_EXP_ITEM;
+}
+
+ExpireAction expireUpdateThirdItemCb(eItem item, void *ctx) {
+    uint64_t expTime = (uint64_t) (uintptr_t) ctx;
+    static int calls = 0;
+    if ((calls++) == 3) {
+        ebSetMetaExpTime(&(((MyItem *)item)->mexpire), expTime );
+        return ACT_UPDATE_EXP_ITEM;
+    }
+
+    return ACT_REMOVE_EXP_ITEM;
+}
+
+void deleteItemCb(eItem item, void *ctx) {
+    UNUSED(ctx);
+    zfree(item);
+}
+
+void addItems(ebuckets *eb, uint64_t startExpire, int step, uint64_t numItems, MyItem **ar) {
+    for (uint64_t i = 0 ; i < numItems ; i++) {
+        uint64_t expireTime = startExpire + (i * step);
+        expItemsHashValue = expItemsHashValue ^ expireTime;
+        MyItem *item = zmalloc(sizeof(MyItem));
+        if (ar) ar[i] = item;
+        ebAdd(eb, &myEbucketsType, item, expireTime);
+    }
+}
+
+/* expireRanges - is given as bucket-key to be agnostic to the different configuration
+ *                of EB_BUCKET_KEY_PRECISION */
+void distributeTest(int lowestTime,
+                    uint64_t *expireRanges,
+                    const int *ItemsPerRange,
+                    int numRanges,
+                    int isExpire,
+                    int printStat) {
+    struct timeval timeBefore, timeAfter, timeDryRun, timeCreation, timeDestroy;
+    ebuckets eb = ebCreate();
+
+    /* create items with random expiry */
+    uint64_t startRange = lowestTime;
+
+    expItemsHashValue = 0;
+    void *listOfItems = NULL;
+    for (int i = 0; i < numRanges; i++) {
+        uint64_t endRange = EB_BUCKET_EXP_TIME(expireRanges[i]);
+        for (int j = 0; j < ItemsPerRange[i]; j++) {
+            uint64_t randomExpirey = (rand() % (endRange - startRange)) + startRange;
+            expItemsHashValue = expItemsHashValue ^ (uint32_t) randomExpirey;
+            MyItem *item = zmalloc(sizeof(MyItem));
+            getMyItemExpireMeta(item)->next = listOfItems;
+            listOfItems = item;
+            ebSetMetaExpTime(getMyItemExpireMeta(item), randomExpirey);
+        }
+        startRange = EB_BUCKET_EXP_TIME(expireRanges[i]); /* next start range */
+    }
+
+    /* Take to sample memory after all items allocated and before insertion to ebuckets */
+    size_t  usedMemBefore =  zmalloc_used_memory();
+
+    gettimeofday(&timeBefore, NULL);
+    while (listOfItems) {
+        MyItem *item = (MyItem *)listOfItems;
+        listOfItems = getMyItemExpireMeta(item)->next;
+        uint64_t expireTime = ebGetMetaExpTime(&item->mexpire);
+        ebAdd(&eb, &myEbucketsType, item, expireTime);
+    }
+    gettimeofday(&timeAfter, NULL);
+    timersub(&timeAfter, &timeBefore, &timeCreation);
+
+    gettimeofday(&timeBefore, NULL);
+    ebExpireDryRun(eb, &myEbucketsType, 0xFFFFFFFFFFFF);  /* expire dry-run all */
+    gettimeofday(&timeAfter, NULL);
+    timersub(&timeAfter, &timeBefore, &timeDryRun);
+
+    if (printStat) {
+        _ebPrint(eb, &myEbucketsType, zmalloc_used_memory() - usedMemBefore, 0);
+    }
+
+    gettimeofday(&timeBefore, NULL);
+    if (isExpire) {
+        startRange = lowestTime;
+        /* Active expire according to the ranges */
+        for (int i = 0 ; i < numRanges ; i++) {
+
+            /* When checking how many items are expired, we need to take into
+             * consideration EB_BUCKET_KEY_PRECISION. The value of "info->now"
+             * will be adjusted by ebActiveExpire() to lookup only for all buckets
+             * with assigned keys that are older than 1<<EB_BUCKET_KEY_PRECISION
+             * msec ago. That is, it is needed to visit only the buckets with keys
+             * that are "<" EB_BUCKET_KEY(info->now) and not "<=".
+             * But if there is a list behind ebuckets, then this limitation is not
+             * applied and the operator "<=" will be used instead.
+             *
+             * The '-1' in case of list brings makes both cases aligned to have
+             * same result */
+            uint64_t now = EB_BUCKET_EXP_TIME(expireRanges[i]) + (ebIsList(eb) ? -1 : 0);
+
+            TimeRange range = {EB_BUCKET_EXP_TIME(startRange), EB_BUCKET_EXP_TIME(expireRanges[i]) };
+            ExpireInfo info = {
+                    .maxToExpire = 0xFFFFFFFF,
+                    .onExpireItem = expireItemCb,
+                    .ctx = &range,
+                    .now = now,
+                    .itemsExpired = 0};
+
+            ebExpire(&eb, &myEbucketsType, &info);
+
+            assert( (eb==NULL && (i + 1 == numRanges)) || (eb!=NULL && (i + 1 < numRanges)) );
+            assert( info.itemsExpired == (uint64_t) ItemsPerRange[i]);
+            startRange = expireRanges[i];
+        }
+        assert(eb == NULL);
+        assert( (expItemsHashValue & 0xFFFFFFFF) == 0);
+    }
+    ebDestroy(&eb, &myEbucketsType, NULL);
+    gettimeofday(&timeAfter, NULL);
+    timersub(&timeAfter, &timeBefore, &timeDestroy);
+
+    if (printStat) {
+        printf("Time elapsed ebuckets creation     : %ld.%06ld\n", (long int)timeCreation.tv_sec, (long int)timeCreation.tv_usec);
+        printf("Time elapsed active-expire dry-run : %ld.%06ld\n", (long int)timeDryRun.tv_sec, (long int)timeDryRun.tv_usec);
+        if (isExpire)
+            printf("Time elapsed active-expire         : %ld.%06ld\n", (long int)timeDestroy.tv_sec, (long int)timeDestroy.tv_usec);
+        else
+            printf("Time elapsed destroy               : %ld.%06ld\n", (long int)timeDestroy.tv_sec, (long int)timeDestroy.tv_usec);
+    }
+
+}
+
+#define UNUSED(x) (void)(x)
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+eItem defragCallback(const eItem item) {
+    size_t size = zmalloc_usable_size(item);
+    eItem newitem = zmalloc(size);
+    memcpy(newitem, item, size);
+    zfree(item);
+    return newitem;
+}
+
+int ebucketsTest(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    srand(0);
+
+    int verbose = (flags & REDIS_TEST_VERBOSE) ? 2 : 1;
+    UNUSED(verbose);
+
+#ifdef EB_TEST_BENCHMARK
+    TEST("ebuckets - benchmark 10 million items: alloc + add + activeExpire") {
+
+        struct TestParams {
+            uint64_t minExpire;
+            uint64_t maxExpire;
+            int items;
+            const char *description;
+        } testCases[] = {
+            { 1805092100000, 1805092100000 + (uint64_t) 1,                10000000, "1 msec distribution"  },
+            { 1805092100000, 1805092100000 + (uint64_t) 1000,             10000000, "1 sec distribution"   },
+            { 1805092100000, 1805092100000 + (uint64_t) 1000*60,          10000000, "1 min distribution"   },
+            { 1805092100000, 1805092100000 + (uint64_t) 1000*60*60,       10000000, "1 hour distribution"  },
+            { 1805092100000, 1805092100000 + (uint64_t) 1000*60*60*24,    10000000, "1 day distribution"   },
+            { 1805092100000, 1805092100000 + (uint64_t) 1000*60*60*24*7,  10000000, "1 week distribution"  },
+            { 1805092100000, 1805092100000 + (uint64_t) 1000*60*60*24*30, 10000000, "1 month distribution" }
+        };
+
+        /* selected test */
+        uint32_t tid = EB_TEST_BENCHMARK;
+
+        printf("\n------ TEST EBUCKETS: %s ------\n", testCases[tid].description);
+        uint64_t expireRanges[] = { testCases[tid].minExpire, testCases[tid].maxExpire };
+        int itemsPerRange[] = { 0, testCases[tid].items };
+
+        /* expireRanges[] is provided to distributeTest() as bucket-key values */
+        for (uint32_t j = 0; j < ARRAY_SIZE(expireRanges); ++j) {
+            expireRanges[j] = expireRanges[j] >> EB_BUCKET_KEY_PRECISION;
+        }
+
+        distributeTest(0, expireRanges, itemsPerRange, ARRAY_SIZE(expireRanges), 1, 1);
+        return 0;
+    }
+#endif
+
+    TEST("list - Create a single item, get TTL, and remove") {
+        MyItem *singleItem = zmalloc(sizeof(MyItem));
+        ebuckets eb = NULL;
+        ebAdd(&eb, &myEbucketsType, singleItem, 1000);
+        assert(ebGetExpireTime(&myEbucketsType, singleItem) == 1000 );
+
+        /* remove the item */
+        assert(ebRemove(&eb, &myEbucketsType, singleItem));
+        /* now the ebuckets is empty */
+        assert(ebRemove(&eb, &myEbucketsType, singleItem) == 0);
+
+        zfree(singleItem);
+
+        ebDestroy(&eb, &myEbucketsType, NULL);
+    }
+
+    TEST("list - Create few items on different times, get TTL, and then remove") {
+        MyItem *items[EB_LIST_MAX_ITEMS];
+        ebuckets eb = NULL;
+        for (int i = 0 ; i < EB_LIST_MAX_ITEMS  ; i++) {
+            items[i] = zmalloc(sizeof(MyItem));
+            ebAdd(&eb, &myEbucketsType, items[i], i);
+        }
+
+        for (uint64_t i = 0 ; i < EB_LIST_MAX_ITEMS ; i++) {
+            assert(ebGetExpireTime(&myEbucketsType, items[i]) == i );
+            assert(ebRemove(&eb, &myEbucketsType, items[i]));
+        }
+
+        for (int i = 0 ; i < EB_LIST_MAX_ITEMS  ; i++) {
+            zfree(items[i]);
+        }
+
+        ebDestroy(&eb, &myEbucketsType, NULL);
+    }
+
+    TEST("list - Create few items on different times, get TTL, and then delete") {
+        MyItem *items[EB_LIST_MAX_ITEMS];
+        ebuckets eb = NULL;
+        for (int i = 0 ; i < EB_LIST_MAX_ITEMS  ; i++) {
+            items[i] = zmalloc(sizeof(MyItem));
+            ebAdd(&eb, &myEbucketsType, items[i], i);
+        }
+
+        for (uint64_t i = 0 ; i < EB_LIST_MAX_ITEMS ; i++) {
+            assert(ebGetExpireTime(&myEbucketsType, items[i]) == i );
+        }
+
+        ebDestroy(&eb, &myEbucketsType, NULL);
+    }
+
+    TEST_COND("ebuckets - Add items with increased/decreased expiration time and then expire",
+              EB_BUCKET_KEY_PRECISION > 0)
+    {
+        ebuckets eb = NULL;
+
+        for (int isDecr = 0; isDecr < 2; ++isDecr) {
+            for (uint32_t numItems = 1; numItems < 64; ++numItems) {
+                uint64_t step = 1 << EB_BUCKET_KEY_PRECISION;
+
+                if (isDecr == 0)
+                    addItems(&eb, 0, step, numItems, NULL);
+                else
+                    addItems(&eb, (numItems - 1) * step, -step, numItems, NULL);
+
+                for (uint32_t i = 1; i <= numItems; i++) {
+                    TimeRange range = {EB_BUCKET_EXP_TIME(i - 1), EB_BUCKET_EXP_TIME(i)};
+                    ExpireInfo info = {
+                            .maxToExpire = 1,
+                            .onExpireItem = expireItemCb,
+                            .ctx = &range,
+                            .now = EB_BUCKET_EXP_TIME(i),
+                            .itemsExpired = 0};
+
+                    ebExpire(&eb, &myEbucketsType, &info);
+                    assert(info.itemsExpired == 1);
+                    if (i == numItems) { /* if last item */
+                        assert(eb == NULL);
+                        assert(info.nextExpireTime == EB_EXPIRE_TIME_INVALID);
+                    } else {
+                        assert(info.nextExpireTime == EB_BUCKET_EXP_TIME(i));
+                    }
+                }
+            }
+        }
+    }
+
+    TEST_COND("ebuckets - Create items with same expiration time and then expire",
+              EB_BUCKET_KEY_PRECISION > 0)
+    {
+        ebuckets eb = NULL;
+        uint64_t expirePerIter = 2;
+        for (uint32_t numIterations = 1; numIterations < 100; ++numIterations) {
+            uint32_t numItems = numIterations * expirePerIter;
+            uint64_t expireTime = (1 << EB_BUCKET_KEY_PRECISION) + 1;
+            addItems(&eb, expireTime, 0, numItems, NULL);
+
+            for (uint32_t i = 1; i <= numIterations; i++) {
+                ExpireInfo info = {
+                        .maxToExpire = expirePerIter,
+                        .onExpireItem = expireItemCb,
+                        .ctx = NULL,
+                        .now = (2 << EB_BUCKET_KEY_PRECISION),
+                        .itemsExpired = 0};
+                ebExpire(&eb, &myEbucketsType, &info);
+                assert(info.itemsExpired == expirePerIter);
+                if (i == numIterations) { /* if last item */
+                    assert(eb == NULL);
+                    assert(info.nextExpireTime == EB_EXPIRE_TIME_INVALID);
+                } else {
+                    assert(info.nextExpireTime == expireTime);
+                }
+            }
+        }
+    }
+
+    TEST("list - Create few items on random times and then expire/delete ") {
+        for (int isExpire = 0 ; isExpire <= 1 ; ++isExpire ) {
+            uint64_t expireRanges[] = {1000};   /* bucket-keys */
+            int itemsPerRange[] = {EB_LIST_MAX_ITEMS};
+            distributeTest(0, expireRanges, itemsPerRange,
+                           ARRAY_SIZE(expireRanges), isExpire, 0);
+        }
+    }
+
+    TEST("list - Create few items (list) on same time and then active expire/delete ") {
+        for (int isExpire = 0 ; isExpire <= 1 ; ++isExpire ) {
+            uint64_t expireRanges[] = {1, 2};  /* bucket-keys */
+            int itemsPerRange[] = {0, EB_LIST_MAX_ITEMS};
+
+            distributeTest(0, expireRanges, itemsPerRange,
+                           ARRAY_SIZE(expireRanges), isExpire, 0);
+        }
+    }
+
+    TEST("ebuckets - Create many items on same time and then active expire/delete ") {
+        for (int isExpire = 1 ; isExpire <= 1 ; ++isExpire ) {
+            uint64_t expireRanges[] = {1, 2}; /* bucket-keys */
+            int itemsPerRange[] = {0, 20};
+
+            distributeTest(0, expireRanges, itemsPerRange,
+                           ARRAY_SIZE(expireRanges), isExpire, 0);
+        }
+    }
+
+    TEST("ebuckets - Create items on different times and then expire/delete ") {
+        for (int isExpire = 0 ; isExpire <= 0 ; ++isExpire ) {
+            for (int numItems = 1 ; numItems < 100 ; ++numItems ) {
+                uint64_t expireRanges[] = {1000000}; /* bucket-keys */
+                int itemsPerRange[] = {numItems};
+                distributeTest(0, expireRanges, itemsPerRange,
+                               ARRAY_SIZE(expireRanges), 1, 0);
+            }
+        }
+    }
+
+    TEST("ebuckets - Create items on different times and then ebRemove() ") {
+        ebuckets eb = NULL;
+
+        for (int step = -1 ; step <= 1 ; ++step) {
+            for (int numItems = 1; numItems <= EB_SEG_MAX_ITEMS*3; ++numItems) {
+                for (int offset = 0; offset < numItems; offset++) {
+                    MyItem *items[numItems];
+                    uint64_t startValue = 1000 << EB_BUCKET_KEY_PRECISION;
+                    int stepValue = step * (1 << EB_BUCKET_KEY_PRECISION);
+                    addItems(&eb, startValue, stepValue, numItems, items);
+                    for (int i = 0; i < numItems; i++) {
+                        int at = (i + offset) % numItems;
+                        assert(ebRemove(&eb, &myEbucketsType, items[at]));
+                        zfree(items[at]);
+                    }
+                    assert(eb == NULL);
+                }
+            }
+        }
+    }
+
+    TEST("ebuckets - test min/max expire time") {
+        ebuckets eb = NULL;
+        MyItem items[3*EB_SEG_MAX_ITEMS];
+        for (int numItems = 1 ; numItems < (int)ARRAY_SIZE(items) ; numItems++) {
+            uint64_t minExpTime = RAND_MAX, maxExpTime = 0;
+            for (int i = 0; i < numItems; i++) {
+                 /* generate random expiration time */
+                uint64_t expireTime = rand();
+                if (expireTime < minExpTime) minExpTime = expireTime;
+                if (expireTime > maxExpTime) maxExpTime = expireTime;
+                ebAdd(&eb, &myEbucketsType2, items + i, expireTime);
+                assert(ebGetNextTimeToExpire(eb, &myEbucketsType2) == minExpTime);
+                assert(ebGetMaxExpireTime(eb, &myEbucketsType2, 0) == maxExpTime);
+            }
+            ebDestroy(&eb, &myEbucketsType2, NULL);
+        }
+    }
+
+    TEST_COND("ebuckets - test min/max expire time, with extended-segment",
+              (1<<EB_BUCKET_KEY_PRECISION) > 2*EB_SEG_MAX_ITEMS) {
+        ebuckets eb = NULL;
+        MyItem items[(2*EB_SEG_MAX_ITEMS)-1];
+        for (int numItems = EB_SEG_MAX_ITEMS+1 ; numItems < (int)ARRAY_SIZE(items) ; numItems++) {
+            /* First reach extended-segment (two chained segments in a bucket) */
+            for (int i = 0; i <= EB_SEG_MAX_ITEMS; i++) {
+                uint64_t itemExpireTime = (1<<EB_BUCKET_KEY_PRECISION) + i;
+                ebAdd(&eb, &myEbucketsType2, items + i, itemExpireTime);
+            }
+
+            /* Now start adding more items to extended-segment and verify min/max */
+            for (int i = EB_SEG_MAX_ITEMS+1; i < numItems; i++) {
+                uint64_t itemExpireTime = (1<<EB_BUCKET_KEY_PRECISION) + i;
+                ebAdd(&eb, &myEbucketsType2, items + i, itemExpireTime);
+                assert(ebGetNextTimeToExpire(eb, &myEbucketsType2) == (uint64_t)(2<<EB_BUCKET_KEY_PRECISION));
+                assert(ebGetMaxExpireTime(eb, &myEbucketsType2, 0) == (uint64_t)(2<<EB_BUCKET_KEY_PRECISION));
+                assert(ebGetMaxExpireTime(eb, &myEbucketsType2, 1) == (uint64_t)((1<<EB_BUCKET_KEY_PRECISION) + i));
+            }
+            ebDestroy(&eb, &myEbucketsType2, NULL);
+        }
+    }
+
+    TEST("ebuckets - active-expire dry-run") {
+        ebuckets eb = NULL;
+        MyItem items[2*EB_SEG_MAX_ITEMS];
+
+        for (int numItems = 1 ; numItems < (int)ARRAY_SIZE(items) ; numItems++) {
+            int maxExpireKey = (numItems % 2) ? 40 : 2;
+            /* Allocate numItems and add to ebuckets */
+            for (int i = 0; i < numItems; i++) {
+                /* generate random expiration time */
+                uint64_t expireTime = (rand() % maxExpireKey) << EB_BUCKET_KEY_PRECISION;
+                ebAdd(&eb, &myEbucketsType2, items + i, expireTime);
+            }
+
+            for (int i = 0 ; i <= maxExpireKey ; ++i) {
+                uint64_t now = i << EB_BUCKET_KEY_PRECISION;
+
+                /* Count how much items are expired */
+                uint64_t expectedNumExpired = 0;
+                for (int j = 0; j < numItems; j++) {
+                    if (ebGetExpireTime(&myEbucketsType2, items + j) < now)
+                        expectedNumExpired++;
+                }
+                /* Perform dry-run and verify number of expired items */
+                assert(ebExpireDryRun(eb, &myEbucketsType2, now) == expectedNumExpired);
+            }
+            ebDestroy(&eb, &myEbucketsType2, NULL);
+        }
+    }
+
+    TEST("ebuckets - active expire callback returns ACT_UPDATE_EXP_ITEM") {
+        ebuckets eb = NULL;
+        MyItem items[2*EB_SEG_MAX_ITEMS];
+        int numItems = 2*EB_SEG_MAX_ITEMS;
+
+        /* timeline */
+        int expiredAt           = 2,
+            applyActiveExpireAt = 3,
+            updateItemTo        = 5,
+            expectedExpiredAt   = 6;
+
+        /* Allocate numItems and add to ebuckets */
+        for (int i = 0; i < numItems; i++)
+            ebAdd(&eb, &myEbucketsType2, items + i, expiredAt << EB_BUCKET_KEY_PRECISION);
+
+        /* active-expire. Expected that all but one will be expired */
+        ExpireInfo info = {
+                .maxToExpire = 0xFFFFFFFF,
+                .onExpireItem = expireUpdateThirdItemCb,
+                .ctx = (void *) (uintptr_t) (updateItemTo << EB_BUCKET_KEY_PRECISION),
+                .now = applyActiveExpireAt << EB_BUCKET_KEY_PRECISION,
+                .itemsExpired = 0};
+        ebExpire(&eb, &myEbucketsType2, &info);
+        assert(info.itemsExpired == (uint64_t) numItems);
+        assert(info.nextExpireTime == (uint64_t)updateItemTo << EB_BUCKET_KEY_PRECISION);
+        assert(ebGetTotalItems(eb, &myEbucketsType2) == 1);
+
+        /* active-expire. Expected that all will be expired */
+        ExpireInfo info2 = {
+                .maxToExpire = 0xFFFFFFFF,
+                .onExpireItem = expireUpdateThirdItemCb,
+                .ctx = (void *) (uintptr_t) (updateItemTo << EB_BUCKET_KEY_PRECISION),
+                .now = expectedExpiredAt << EB_BUCKET_KEY_PRECISION,
+                .itemsExpired = 0};
+        ebExpire(&eb, &myEbucketsType2, &info2);
+        assert(info2.itemsExpired == (uint64_t) 1);
+        assert(info2.nextExpireTime == EB_EXPIRE_TIME_INVALID);
+        assert(ebGetTotalItems(eb, &myEbucketsType2) == 0);
+
+        ebDestroy(&eb, &myEbucketsType2, NULL);
+
+    }
+
+    TEST("item defragmentation") {
+        for (int s = 1; s <= EB_LIST_MAX_ITEMS * 3; s++) {
+            ebuckets eb = NULL;
+            MyItem *items[s];
+            for (int i = 0; i < s; i++) {
+                items[i] = zmalloc(sizeof(MyItem));
+                items[i]->index = i;
+                ebAdd(&eb, &myEbucketsType, items[i], i);
+            }
+            assert((s <= EB_LIST_MAX_ITEMS) ? ebIsList(eb) : !ebIsList(eb));
+            /* Defrag all the items. */
+            for (int i = 0; i < s; i++) {
+                MyItem *newitem = ebDefragItem(&eb, &myEbucketsType, items[i], defragCallback);
+                if (newitem) items[i] = newitem;
+            }
+            /* Verify that the data is not corrupted. */
+            ebValidate(eb, &myEbucketsType);
+            for (int i = 0; i < s; i++)
+                assert(items[i]->index == i);
+            ebDestroy(&eb, &myEbucketsType, NULL);
+        }
+    }
+
+//    TEST("segment - Add smaller item to full segment that all share same ebucket-key")
+//    TEST("segment - Add item to full segment and make it extended-segment (all share same ebucket-key)")
+//    TEST("ebuckets - Create rax tree with extended-segment and add item before")
+
+    return 0;
+}
+
+#endif
diff --git a/src/ebuckets.h b/src/ebuckets.h
new file mode 100644
index 00000000000..fbcae8fd153
--- /dev/null
+++ b/src/ebuckets.h
@@ -0,0 +1,306 @@
+/*
+ * Copyright Redis Ltd. 2024 - present
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0 (RSALv2)
+ * or the Server Side Public License v1 (SSPLv1).
+ *
+ *
+ * WHAT IS EBUCKETS?
+ * -----------------
+ * ebuckets is being used to store items that are set with expiration-time. It
+ * supports the basic API of add, remove and active expiration. The implementation
+ * of it is based on rax-tree, or plain linked-list when small. The expiration time
+ * of the items are used as the key to traverse rax-tree.
+ *
+ * Instead of holding a distinct item in each leaf of the rax-tree we can aggregate
+ * items into small segments and hold it in each leaf. This way we can  avoid
+ * frequent modification of the rax-tree, since many of the modifications
+ * will be done only at the segment level. It will also save memory because
+ * rax-tree can be costly, around 40 bytes per leaf (with rax-key limited to 6
+ * bytes). Whereas each additional item in the segment will cost the size of the
+ * 'next' pointer in a list (8 bytes) and few more bytes for maintenance of the
+ * segment.
+ *
+ * EBUCKETS STRUCTURE
+ * ------------------
+ * The ebuckets data structure is organized in a hierarchical manner as follows:
+ *
+ * 1. ebuckets: This is the top-level data structure. It can be either a rax tree
+ *    or a plain linked list. It contains one or more buckets, each representing
+ *    an interval in time.
+ *
+ * 2. bucket: Each bucket represents an interval in time and contains one or more
+ *    segments. The key in the rax-tree for each bucket represents low
+ *    bound expiration-time for the items within this bucket. The key of the
+ *    following bucket represents the upper bound expiration-time.
+ *
+ * 3. segment: Each segment within a bucket can hold up to `EB_SEG_MAX_ITEMS`
+ *    items as a linked list. If there are more, the segment will try to
+ *    split the bucket. To avoid wasting memory, it is a singly linked list (only
+ *    next-item pointer). It is a cyclic linked-list to allow efficient removal of
+ *    items from the middle of the segment without traversing the rax tree.
+ *
+ * 4. item: Each item that is stored in ebuckets should embed the ExpireMeta
+ *    struct and supply getter function (see EbucketsType.getExpireMeta). This
+ *    struct holds the expire-time of the item and few more fields that are used
+ *    to maintain the segments data-structure.
+ *
+ * SPLITTING BUCKET
+ * ----------------
+ * Each segment can hold up-to `EB_SEG_MAX_ITEMS` items. On insertion of new
+ * item, it will try to split the segment. Here is an example For adding item
+ * with expiration of 42 to a segment that already reached its maximum capacity
+ * which will cause to split of the segment and in turn split of the bucket as
+ * well to a finer grained ranges:
+ *
+ *       BUCKETS                             BUCKETS
+ *      [ 00-10 ] -> size(Seg0) = 11   ==>  [ 00-10 ] -> size(Seg0) = 11
+ *      [ 11-76 ] -> size(Seg1) = 16        [ 11-36 ] -> size(Seg1) = 9
+ *                                          [ 37-76 ] -> size(Seg2) = 7
+ *
+ * EXTENDING BUCKET
+ * ----------------
+ * In the example above, the reason it wasn't split evenly is that Seg1 must have
+ * been holding items with same TTL and they must reside together in the same
+ * bucket after the split. Which brings us to another important point. If there
+ * is a segment that reached its maximum capacity and all the items have same
+ * expiration-time key, then we cannot split the bucket but aggregate all the
+ * items, with same expiration time key, by allocating an extended-segment and
+ * chain it to the first segment in visited bucket. In that sense, extended
+ * segments will only hold items with same expiration-time key.
+ *
+ *       BUCKETS                            BUCKETS
+ *      [ 00-10 ] -> size(Seg0)=11   ==>   [ 00-10 ] -> size(Seg0)=11
+ *      [ 11-12 ] -> size(Seg1)=16         [ 11-12 ] -> size(Seg1)=1 -> size(Seg2)=16
+ *
+ * LIMITING RAX TREE DEPTH
+ * -----------------------
+ * The rax tree is basically a B-tree and its depth is bounded by the sizeof of
+ * the key. Holding 6 bytes for expiration-time key is more than enough to represent
+ * unix-time in msec, and in turn the depth of the tree is limited to 6 levels.
+ * At a first glance it might look sufficient but we need take into consideration
+ * the heavyweight maintenance and traversal of each node in the B-tree.
+ *
+ * And so, we can further prune the tree such that holding keys with msec precision
+ * in the tree doesn't bring with it much value. The active-expiration operation can
+ * live with deletion of expired items, say, older than 1 sec, which means the size
+ * of time-expiration keys to the rax tree become no more than ~4.5 bytes and we
+ * also get rid of the "noisy" bits which most probably will cause to yet another
+ * branching and modification of the rax tree in case of items with time-expiration
+ * difference of less than 1 second. The lazy expiration will still be precise and
+ * without compromise on accuracy because the exact expiration-time is kept
+ * attached as well to each item, in `ExpireMeta`, and each traversal of item with
+ * expiration will behave as expected down to the msec. Take care to configure
+ * `EB_BUCKET_KEY_PRECISION` according to your needs.
+ *
+ * EBUCKET KEY
+ * -----------
+ * Taking into account configured value of `EB_BUCKET_KEY_PRECISION`, two items
+ * with expiration-time t1 and t2 will be considered to have the same key in the
+ * rax-tree/buckets if and only if:
+ *
+ *              EB_BUCKET_KEY(t1) == EB_BUCKET_KEY(t2)
+ *
+ * EBUCKETS CREATION
+ * -----------------
+ * To avoid the cost of allocating rax data-structure for only few elements,
+ * ebuckets will start as a simple linked-list and only when it reaches some
+ * threshold, it will be converted to rax.
+ *
+ * TODO
+ * ----
+ * - ebRemove() optimize to merge small segments into one segment.
+ * - ebAdd() Fix pathological case of cascade addition of items into rax such
+ *   that their values are smaller/bigger than visited extended-segment which ends
+ *   up with multiple segments with a single item in each segment.
+ */
+
+#ifndef __EBUCKETS_H
+#define __EBUCKETS_H
+
+#include <stdlib.h>
+#include <sys/types.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include "rax.h"
+
+/*
+ * EB_BUCKET_KEY_PRECISION - Defines the number of bits to ignore from the
+ * expiration-time when mapping to buckets. The higher the value, the more items
+ * with similar expiration-time will be aggregated into the same bucket. The lower
+ * the value, the more "accurate" the active expiration of buckets will be.
+ *
+ * Note that the accurate time expiration of each item is preserved anyway and
+ * enforced by lazy expiration. It only impacts the active expiration that will
+ * be able to work on buckets older than (1<<EB_BUCKET_KEY_PRECISION) msec ago.
+ * For example if EB_BUCKET_KEY_PRECISION is 10, then active expiration
+ * will work only on buckets that already got expired at least 1sec ago.
+ *
+ * The idea of it is to trim the rax tree depth, avoid having too many branches,
+ * and reduce frequent modifications of the tree to the minimum.
+ */
+#define EB_BUCKET_KEY_PRECISION 0   /* TBD: modify to 10 */
+
+/* From expiration time to bucket-key */
+#define EB_BUCKET_KEY(exptime) ((exptime) >> EB_BUCKET_KEY_PRECISION)
+
+
+#define EB_EXPIRE_TIME_MAX     ((uint64_t)0x0000FFFFFFFFFFFF) /* Maximum expire-time. */
+#define EB_EXPIRE_TIME_INVALID (EB_EXPIRE_TIME_MAX+1) /* assumed bigger than max */
+
+/* Handler to ebuckets DS. Pointer to a list, rax or NULL (empty DS). See also ebIsList(). */
+typedef void *ebuckets;
+
+/* Users of ebuckets will store `eItem` which is just a void pointer to their
+ * element. In addition, eItem should embed the ExpireMeta struct and supply
+ * getter function (see EbucketsType.getExpireMeta).
+ */
+typedef void *eItem;
+
+/* This struct Should be embedded inside `eItem` and must be aligned in memory. */
+typedef struct ExpireMeta {
+    /* 48bits of unix-time in msec.  This value is sufficient to represent, in
+     * unix-time, until the date of 02 August, 10889
+     */
+    uint32_t expireTimeLo;              /* Low bits of expireTime. */
+    uint16_t expireTimeHi;              /* High bits of expireTime. */
+
+    unsigned int lastInSegment    : 1;  /* Last item in segment. If set, then 'next' will
+                                           point to the NextSegHdr, unless lastItemBucket=1
+                                           then it will point to segment header of the
+                                           current segment. */
+    unsigned int firstItemBucket  : 1;  /* First item in bucket. This flag assist
+                                           to manipulate segments directly without
+                                           the need to traverse from start the
+                                           rax tree  */
+    unsigned int lastItemBucket   : 1;  /* Last item in bucket. This flag assist
+                                           to manipulate segments directly without
+                                           the need to traverse from start the
+                                           rax tree  */
+    unsigned int numItems         : 5;  /* Only first item in segment will maintain
+                                           this value. */
+
+    unsigned int trash            : 1;  /* This flag indicates whether the ExpireMeta
+                                           associated with the item is leftover.
+                                           There is always a potential to reuse the
+                                           item after removal/deletion. Note that,
+                                           the user can still safely O(1) TTL lookup
+                                           a given item and verify whether attached
+                                           TTL is valid or leftover. See function
+                                           ebGetExpireTime(). */
+
+    unsigned int userData         : 3;  /* ebuckets can be used to store in same
+                                           instance few different types of items,
+                                           such as, listpack and hash. This field
+                                           is reserved to store such identification
+                                           associated with the item and can help
+                                           to distinct on delete or expire callback.
+                                           It is not used by ebuckets internally and
+                                           should be maintained by the user */
+
+    unsigned int reserved         : 4;
+
+    void *next;                       /* - If not last item in segment then next
+                                           points to next eItem (lastInSegment=0).
+                                         - If last in segment but not last in
+                                           bucket (lastItemBucket=0) then it
+                                           points to next segment header.
+                                         - If last in bucket then it points to
+                                           current segment header (Can be either
+                                           of type FirstSegHdr or NextSegHdr). */
+} ExpireMeta;
+
+/* Each instance of ebuckets need to have corresponding EbucketsType that holds
+ * the necessary callbacks and configuration to operate correctly on the type
+ * of items that are stored in it. Conceptually it should have hold reference
+ * from ebuckets instance to this type, but to save memory we will pass it as
+ * an argument to each API call. */
+typedef struct EbucketsType {
+    /* getter to extract the ExpireMeta from the item */
+    ExpireMeta* (*getExpireMeta)(const eItem item);
+
+    /* Called during ebDestroy(). Set to NULL if not needed. */
+    void (*onDeleteItem)(eItem item, void *ctx);
+
+    /* Is addresses of items are odd in memory. It is taken into consideration
+     * and used by ebuckets to know how to distinct between ebuckets pointer to
+     * rax versus a pointer to item which is head of list. */
+    unsigned int itemsAddrAreOdd;
+} EbucketsType;
+
+/* Returned value by `onExpireItem` callback to indicate the action to be taken by
+ * ebExpire(). */
+typedef enum ExpireAction {
+    ACT_REMOVE_EXP_ITEM=0,      /* Remove the item from ebuckets. */
+    ACT_UPDATE_EXP_ITEM,        /* Re-insert the item with updated expiration-time.
+                                   Before returning this value, the cb need to
+                                   update expiration time of the item by assisting
+                                   function ebSetMetaExpTime(). The item will be
+                                   kept aside and will be added again to ebuckets
+                                   at the end of ebExpire() */
+    ACT_STOP_ACTIVE_EXP         /* Stop active-expiration. It will assume that
+                                   provided 'item' wasn't deleted by the callback. */
+} ExpireAction;
+
+/* ExpireInfo is used to pass input and output parameters to ebExpire(). */
+typedef struct ExpireInfo {
+    /* onExpireItem - Called during active-expiration by ebExpire() */
+    ExpireAction (*onExpireItem)(eItem item, void *ctx);
+
+    uint64_t maxToExpire;         /* [INPUT ] Limit of number expired items to scan */
+    void *ctx;                    /* [INPUT ] context to pass to onExpireItem */
+    uint64_t now;                 /* [INPUT ] Current time in msec. */
+    uint64_t itemsExpired;        /* [OUTPUT] Returns the number of expired or updated items. */
+    uint64_t nextExpireTime;      /* [OUTPUT] Next expiration time. Returns
+                                     EB_EXPIRE_TIME_INVALID if none left. */
+} ExpireInfo;
+
+/* ebuckets API */
+
+static inline ebuckets ebCreate(void) { return NULL; } /* Empty ebuckets */
+
+void ebDestroy(ebuckets *eb, EbucketsType *type, void *deletedItemsCbCtx);
+
+void ebExpire(ebuckets *eb, EbucketsType *type, ExpireInfo *info);
+
+uint64_t ebExpireDryRun(ebuckets eb, EbucketsType *type, uint64_t now);
+
+static inline int ebIsEmpty(ebuckets eb) { return eb == NULL; }
+
+uint64_t ebGetNextTimeToExpire(ebuckets eb, EbucketsType *type);
+
+uint64_t ebGetMaxExpireTime(ebuckets eb, EbucketsType *type, int accurate);
+
+uint64_t ebGetTotalItems(ebuckets eb, EbucketsType *type);
+
+/* Item related API */
+
+int ebRemove(ebuckets *eb, EbucketsType *type, eItem item);
+
+int ebAdd(ebuckets *eb, EbucketsType *type, eItem item, uint64_t expireTime);
+
+uint64_t ebGetExpireTime(EbucketsType *type, eItem item);
+
+typedef eItem (ebDefragFunction)(const eItem item);
+eItem ebDefragItem(ebuckets *eb, EbucketsType *type, eItem item, ebDefragFunction *fn);
+
+static inline uint64_t ebGetMetaExpTime(ExpireMeta *expMeta) {
+    return (((uint64_t)(expMeta)->expireTimeHi << 32) | (expMeta)->expireTimeLo);
+}
+
+static inline void ebSetMetaExpTime(ExpireMeta *expMeta, uint64_t t) {
+    expMeta->expireTimeLo = (uint32_t)(t&0xFFFFFFFF);
+    expMeta->expireTimeHi = (uint16_t)((t) >> 32);
+}
+
+/* Debug API */
+
+void ebValidate(ebuckets eb, EbucketsType *type);
+
+void ebPrint(ebuckets eb, EbucketsType *type);
+
+#ifdef REDIS_TEST
+int ebucketsTest(int argc, char *argv[], int flags);
+#endif
+
+#endif /* __EBUCKETS_H */
diff --git a/src/endianconv.c b/src/endianconv.c
index 8eb6b22288d..36673e0d015 100644
--- a/src/endianconv.c
+++ b/src/endianconv.c
@@ -13,32 +13,11 @@
  *
  * ----------------------------------------------------------------------------
  *
- * Copyright (c) 2011-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2011-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 
diff --git a/src/endianconv.h b/src/endianconv.h
index bfe9b7d0acd..469913d1165 100644
--- a/src/endianconv.h
+++ b/src/endianconv.h
@@ -2,32 +2,11 @@
  *
  * ----------------------------------------------------------------------------
  *
- * Copyright (c) 2011-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2011-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __ENDIANCONV_H
diff --git a/src/eval.c b/src/eval.c
index eb4b529368d..1cea9e6db02 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2011-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -38,6 +17,9 @@
 #include <lua.h>
 #include <lauxlib.h>
 #include <lualib.h>
+#if defined(USE_JEMALLOC)
+#include <lstate.h>
+#endif
 #include <ctype.h>
 #include <math.h>
 
@@ -46,6 +28,7 @@ void ldbDisable(client *c);
 void ldbEnable(client *c);
 void evalGenericCommandWithDebugging(client *c, int evalsha);
 sds ldbCatStackValue(sds s, lua_State *lua, int idx);
+listNode *luaScriptsLRUAdd(client *c, sds sha, int evalsha);
 
 static void dictLuaScriptDestructor(dict *d, void *val) {
     UNUSED(d);
@@ -58,7 +41,7 @@ static uint64_t dictStrCaseHash(const void *key) {
     return dictGenCaseHashFunction((unsigned char*)key, strlen((char*)key));
 }
 
-/* server.lua_scripts sha (as sds string) -> scripts (as luaScript) cache. */
+/* lctx.lua_scripts sha (as sds string) -> scripts (as luaScript) cache. */
 dictType shaScriptObjectDictType = {
         dictStrCaseHash,            /* hash function */
         NULL,                       /* key dup */
@@ -74,6 +57,7 @@ struct luaCtx {
     lua_State *lua; /* The Lua interpreter. We use just one for all clients */
     client *lua_client;   /* The "fake client" to query Redis from Lua */
     dict *lua_scripts;         /* A dictionary of SHA1 -> Lua scripts */
+    list *lua_scripts_lru_list; /* A list of SHA1, first in first out LRU eviction. */
     unsigned long long lua_scripts_mem;  /* Cached scripts' memory + oh */
 } lctx;
 
@@ -181,18 +165,23 @@ int luaRedisReplicateCommandsCommand(lua_State *lua) {
  *
  * However it is simpler to just call scriptingReset() that does just that. */
 void scriptingInit(int setup) {
-    lua_State *lua = lua_open();
-
     if (setup) {
         lctx.lua_client = NULL;
         server.script_disable_deny_script = 0;
         ldbInit();
     }
 
+    lua_State *lua = createLuaState();
+    if (lua == NULL) {
+        serverLog(LL_WARNING, "Failed creating the lua VM.");
+        exit(1);
+    }
+
     /* Initialize a dictionary we use to map SHAs to scripts.
-     * This is useful for replication, as we need to replicate EVALSHA
-     * as EVAL, so we need to remember the associated script. */
+     * Initialize a list we use for lua script evictions, it shares the
+     * sha with the dictionary, so free fn is not set. */
     lctx.lua_scripts = dictCreate(&shaScriptObjectDictType);
+    lctx.lua_scripts_lru_list = listCreate();
     lctx.lua_scripts_mem = 0;
 
     luaRegisterRedisAPI(lua);
@@ -264,15 +253,27 @@ void scriptingInit(int setup) {
     lctx.lua = lua;
 }
 
+/* Free lua_scripts dict and close lua interpreter. */
+void freeLuaScriptsSync(dict *lua_scripts, list *lua_scripts_lru_list, lua_State *lua) {
+    dictRelease(lua_scripts);
+    listRelease(lua_scripts_lru_list);
+    lua_close(lua);
+
+#if defined(USE_JEMALLOC)
+    /* When lua is closed, destroy the previously used private tcache. */
+    void *ud = (global_State*)G(lua)->ud;
+    unsigned int lua_tcache = (unsigned int)(uintptr_t)ud;
+    je_mallctl("tcache.destroy", NULL, NULL, (void *)&lua_tcache, sizeof(unsigned int));
+#endif
+}
+
 /* Release resources related to Lua scripting.
  * This function is used in order to reset the scripting environment. */
 void scriptingRelease(int async) {
     if (async)
-        freeLuaScriptsAsync(lctx.lua_scripts);
+        freeLuaScriptsAsync(lctx.lua_scripts, lctx.lua_scripts_lru_list, lctx.lua);
     else
-        dictRelease(lctx.lua_scripts);
-    lctx.lua_scripts_mem = 0;
-    lua_close(lctx.lua);
+        freeLuaScriptsSync(lctx.lua_scripts, lctx.lua_scripts_lru_list, lctx.lua);
 }
 
 void scriptingReset(int async) {
@@ -418,8 +419,11 @@ uint64_t evalGetCommandFlags(client *c, uint64_t cmd_flags) {
  * exists, and in such a case, it behaves like in the success case.
  *
  * If 'c' is not NULL, on error the client is informed with an appropriate
- * error describing the nature of the problem and the Lua interpreter error. */
-sds luaCreateFunction(client *c, robj *body) {
+ * error describing the nature of the problem and the Lua interpreter error.
+ *
+ * 'evalsha' indicating whether the lua function is created from the EVAL context
+ * or from the SCRIPT LOAD. */
+sds luaCreateFunction(client *c, robj *body, int evalsha) {
     char funcname[43];
     dictEntry *de;
     uint64_t script_flags;
@@ -436,7 +440,9 @@ sds luaCreateFunction(client *c, robj *body) {
     ssize_t shebang_len = 0;
     sds err = NULL;
     if (evalExtractShebangFlags(body->ptr, &script_flags, &shebang_len, &err) == C_ERR) {
-        addReplyErrorSds(c, err);
+        if (c != NULL) {
+            addReplyErrorSds(c, err);
+        }
         return NULL;
     }
 
@@ -462,6 +468,7 @@ sds luaCreateFunction(client *c, robj *body) {
     l->body = body;
     l->flags = script_flags;
     sds sha = sdsnewlen(funcname+2,40);
+    l->node = luaScriptsLRUAdd(c, sha, evalsha);
     int retval = dictAdd(lctx.lua_scripts,sha,l);
     serverAssertWithInfo(c ? c : lctx.lua_client,NULL,retval == DICT_OK);
     lctx.lua_scripts_mem += sdsZmallocSize(sha) + getStringObjectSdsUsedMemory(body);
@@ -469,6 +476,63 @@ sds luaCreateFunction(client *c, robj *body) {
     return sha;
 }
 
+/* Delete a Lua function with the specified sha.
+ *
+ * This will delete the lua function from the lua interpreter and delete
+ * the lua function from server. */
+void luaDeleteFunction(client *c, sds sha) {
+    /* Delete the script from lua interpreter. */
+    char funcname[43];
+    funcname[0] = 'f';
+    funcname[1] = '_';
+    memcpy(funcname+2, sha, 40);
+    funcname[42] = '\0';
+    lua_pushnil(lctx.lua);
+    lua_setfield(lctx.lua, LUA_REGISTRYINDEX, funcname);
+
+    /* Delete the script from server. */
+    dictEntry *de = dictUnlink(lctx.lua_scripts, sha);
+    serverAssertWithInfo(c ? c : lctx.lua_client, NULL, de);
+    luaScript *l = dictGetVal(de);
+    /* We only delete `EVAL` scripts, which must exist in the LRU list. */
+    serverAssert(l->node);
+    listDelNode(lctx.lua_scripts_lru_list, l->node);
+    lctx.lua_scripts_mem -= sdsZmallocSize(sha) + getStringObjectSdsUsedMemory(l->body);
+    dictFreeUnlinkedEntry(lctx.lua_scripts, de);
+}
+
+/* Users who abuse EVAL will generate a new lua script on each call, which can
+ * consume large amounts of memory over time. Since EVAL is mostly the one that
+ * abuses the lua cache, and these won't have pipeline issues (scripts won't
+ * disappear when EVALSHA needs it and cause failure), we implement script eviction
+ * only for these (not for one loaded with SCRIPT LOAD). Considering that we don't
+ * have many scripts, then unlike keys, we don't need to worry about the memory
+ * usage of keeping a true sorted LRU linked list.
+ *
+ * 'evalsha' indicating whether the lua function is added from the EVAL context
+ * or from the SCRIPT LOAD.
+ *
+ * Returns the corresponding node added, which is used to save it in luaScript
+ * and use it for quick removal and re-insertion into an LRU list each time the
+ * script is used. */
+#define LRU_LIST_LENGTH 500
+listNode *luaScriptsLRUAdd(client *c, sds sha, int evalsha) {
+    /* Script eviction only applies to EVAL, not SCRIPT LOAD. */
+    if (evalsha) return NULL;
+
+    /* Evict oldest. */
+    while (listLength(lctx.lua_scripts_lru_list) >= LRU_LIST_LENGTH) {
+        listNode *ln = listFirst(lctx.lua_scripts_lru_list);
+        sds oldest = listNodeValue(ln);
+        luaDeleteFunction(c, oldest);
+        server.stat_evictedscripts++;
+    }
+
+    /* Add current. */
+    listAddNodeTail(lctx.lua_scripts_lru_list, sha);
+    return listLast(lctx.lua_scripts_lru_list);
+}
+
 void evalGenericCommand(client *c, int evalsha) {
     lua_State *lua = lctx.lua;
     char funcname[43];
@@ -507,7 +571,7 @@ void evalGenericCommand(client *c, int evalsha) {
             addReplyErrorObject(c, shared.noscripterr);
             return;
         }
-        if (luaCreateFunction(c,c->argv[1]) == NULL) {
+        if (luaCreateFunction(c, c->argv[1], evalsha) == NULL) {
             lua_pop(lua,1); /* remove the error handler from the stack. */
             /* The error is sent to the client by luaCreateFunction()
              * itself when it returns NULL. */
@@ -536,6 +600,13 @@ void evalGenericCommand(client *c, int evalsha) {
     luaCallFunction(&rctx, lua, c->argv+3, numkeys, c->argv+3+numkeys, c->argc-3-numkeys, ldb.active);
     lua_pop(lua,1); /* Remove the error handler. */
     scriptResetRun(&rctx);
+
+    if (l->node) {
+        /* Quick removal and re-insertion after the script is called to
+         * maintain the LRU list. */
+        listUnlinkNode(lctx.lua_scripts_lru_list, l->node);
+        listLinkNodeTail(lctx.lua_scripts_lru_list, l->node);
+    }
 }
 
 void evalCommand(client *c) {
@@ -621,7 +692,7 @@ NULL
                 addReply(c,shared.czero);
         }
     } else if (c->argc == 3 && !strcasecmp(c->argv[1]->ptr,"load")) {
-        sds sha = luaCreateFunction(c,c->argv[2]);
+        sds sha = luaCreateFunction(c, c->argv[2], 1);
         if (sha == NULL) return; /* The error was sent by luaCreateFunction(). */
         addReplyBulkCBuffer(c,sha,40);
     } else if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"kill")) {
@@ -661,7 +732,8 @@ dict* evalScriptsDict(void) {
 unsigned long evalScriptsMemory(void) {
     return lctx.lua_scripts_mem +
             dictMemUsage(lctx.lua_scripts) +
-            dictSize(lctx.lua_scripts) * sizeof(luaScript);
+            dictSize(lctx.lua_scripts) * sizeof(luaScript) +
+            listLength(lctx.lua_scripts_lru_list) * sizeof(listNode);
 }
 
 /* ---------------------------------------------------------------------------
@@ -1665,3 +1737,7 @@ void luaLdbLineHook(lua_State *lua, lua_Debug *ar) {
         rctx->start_time = getMonotonicUs();
     }
 }
+
+dict *getLuaScripts(void) {
+    return lctx.lua_scripts;
+}
diff --git a/src/evict.c b/src/evict.c
index 909714b4304..890a845d5df 100644
--- a/src/evict.c
+++ b/src/evict.c
@@ -2,32 +2,11 @@
  *
  * ----------------------------------------------------------------------------
  *
- * Copyright (c) 2009-2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -58,6 +37,7 @@ struct evictionPoolEntry {
     sds key;                    /* Key name. */
     sds cached;                 /* Cached SDS object for key name. */
     int dbid;                   /* Key DB number. */
+    int slot;                   /* Slot. */
 };
 
 static struct evictionPoolEntry *EvictionPoolLRU;
@@ -142,12 +122,12 @@ void evictionPoolAlloc(void) {
  * We insert keys on place in ascending order, so keys with the smaller
  * idle time are on the left, and keys with the higher idle time on the
  * right. */
-
-void evictionPoolPopulate(int dbid, dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) {
+int evictionPoolPopulate(redisDb *db, kvstore *samplekvs, struct evictionPoolEntry *pool) {
     int j, k, count;
     dictEntry *samples[server.maxmemory_samples];
 
-    count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples);
+    int slot = kvstoreGetFairRandomDictIndex(samplekvs);
+    count = kvstoreDictGetSomeKeys(samplekvs,slot,samples,server.maxmemory_samples);
     for (j = 0; j < count; j++) {
         unsigned long long idle;
         sds key;
@@ -161,13 +141,14 @@ void evictionPoolPopulate(int dbid, dict *sampledict, dict *keydict, struct evic
          * dictionary (but the expires one) we need to lookup the key
          * again in the key dictionary to obtain the value object. */
         if (server.maxmemory_policy != MAXMEMORY_VOLATILE_TTL) {
-            if (sampledict != keydict) de = dictFind(keydict, key);
+            if (samplekvs != db->keys)
+                de = kvstoreDictFind(db->keys, slot, key);
             o = dictGetVal(de);
         }
 
         /* Calculate the idle time according to the policy. This is called
          * idle just because the code initially handled LRU, but is in fact
-         * just a score where an higher score means better candidate. */
+         * just a score where a higher score means better candidate. */
         if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) {
             idle = estimateObjectIdleTime(o);
         } else if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
@@ -236,8 +217,11 @@ void evictionPoolPopulate(int dbid, dict *sampledict, dict *keydict, struct evic
             pool[k].key = pool[k].cached;
         }
         pool[k].idle = idle;
-        pool[k].dbid = dbid;
+        pool[k].dbid = db->id;
+        pool[k].slot = slot;
     }
+
+    return count;
 }
 
 /* ----------------------------------------------------------------------------
@@ -249,42 +233,40 @@ void evictionPoolPopulate(int dbid, dict *sampledict, dict *keydict, struct evic
  *
  * We split the 24 bits into two fields:
  *
- *          16 bits      8 bits
- *     +----------------+--------+
- *     + Last decr time | LOG_C  |
- *     +----------------+--------+
+ *            16 bits       8 bits
+ *     +------------------+--------+
+ *     + Last access time | LOG_C  |
+ *     +------------------+--------+
  *
  * LOG_C is a logarithmic counter that provides an indication of the access
  * frequency. However this field must also be decremented otherwise what used
  * to be a frequently accessed key in the past, will remain ranked like that
  * forever, while we want the algorithm to adapt to access pattern changes.
  *
- * So the remaining 16 bits are used in order to store the "decrement time",
+ * So the remaining 16 bits are used in order to store the "access time",
  * a reduced-precision Unix time (we take 16 bits of the time converted
  * in minutes since we don't care about wrapping around) where the LOG_C
- * counter is halved if it has an high value, or just decremented if it
- * has a low value.
+ * counter decays every minute by default (depends on lfu-decay-time).
  *
  * New keys don't start at zero, in order to have the ability to collect
  * some accesses before being trashed away, so they start at LFU_INIT_VAL.
  * The logarithmic increment performed on LOG_C takes care of LFU_INIT_VAL
  * when incrementing the key, so that keys starting at LFU_INIT_VAL
  * (or having a smaller value) have a very high chance of being incremented
- * on access.
+ * on access. (The chance depends on counter and lfu-log-factor.)
  *
- * During decrement, the value of the logarithmic counter is halved if
- * its current value is greater than two times the LFU_INIT_VAL, otherwise
- * it is just decremented by one.
+ * During decrement, the value of the logarithmic counter is decremented by
+ * one when lfu-decay-time minutes elapsed.
  * --------------------------------------------------------------------------*/
 
 /* Return the current time in minutes, just taking the least significant
- * 16 bits. The returned time is suitable to be stored as LDT (last decrement
+ * 16 bits. The returned time is suitable to be stored as LDT (last access
  * time) for the LFU implementation. */
 unsigned long LFUGetTimeInMinutes(void) {
     return (server.unixtime/60) & 65535;
 }
 
-/* Given an object last access time, compute the minimum number of minutes
+/* Given an object ldt (last access time), compute the minimum number of minutes
  * that elapsed since the last access. Handle overflow (ldt greater than
  * the current 16 bits minutes time) considering the time as wrapping
  * exactly once. */
@@ -306,10 +288,10 @@ uint8_t LFULogIncr(uint8_t counter) {
     return counter;
 }
 
-/* If the object decrement time is reached decrement the LFU counter but
+/* If the object's ldt (last access time) is reached, decrement the LFU counter but
  * do not update LFU fields of the object, we update the access time
  * and counter in an explicit way when the object is really accessed.
- * And we will times halve the counter according to the times of
+ * And we will decrement the counter according to the times of
  * elapsed time than server.lfu_decay_time.
  * Return the object frequency counter.
  *
@@ -569,6 +551,7 @@ int performEvictions(void) {
 
     /* Try to smoke-out bugs (server.also_propagate should be empty here) */
     serverAssert(server.also_propagate.numops == 0);
+    /* Evictions are performed on random keys that have nothing to do with the current command slot. */
 
     while (mem_freed < (long long)mem_tofree) {
         int j, k, i;
@@ -576,27 +559,43 @@ int performEvictions(void) {
         sds bestkey = NULL;
         int bestdbid;
         redisDb *db;
-        dict *dict;
         dictEntry *de;
 
         if (server.maxmemory_policy & (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LFU) ||
             server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL)
         {
             struct evictionPoolEntry *pool = EvictionPoolLRU;
-
             while (bestkey == NULL) {
-                unsigned long total_keys = 0, keys;
+                unsigned long total_keys = 0;
 
                 /* We don't want to make local-db choices when expiring keys,
                  * so to start populate the eviction pool sampling keys from
                  * every DB. */
                 for (i = 0; i < server.dbnum; i++) {
                     db = server.db+i;
-                    dict = (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) ?
-                            db->dict : db->expires;
-                    if ((keys = dictSize(dict)) != 0) {
-                        evictionPoolPopulate(i, dict, db->dict, pool);
-                        total_keys += keys;
+                    kvstore *kvs;
+                    if (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) {
+                        kvs = db->keys;
+                    } else {
+                        kvs = db->expires;
+                    }
+                    unsigned long sampled_keys = 0;
+                    unsigned long current_db_keys = kvstoreSize(kvs);
+                    if (current_db_keys == 0) continue;
+
+                    total_keys += current_db_keys;
+                    int l = kvstoreNumNonEmptyDicts(kvs);
+                    /* Do not exceed the number of non-empty slots when looping. */
+                    while (l--) {
+                        sampled_keys += evictionPoolPopulate(db, kvs, pool);
+                        /* We have sampled enough keys in the current db, exit the loop. */
+                        if (sampled_keys >= (unsigned long) server.maxmemory_samples)
+                            break;
+                        /* If there are not a lot of keys in the current db, dict/s may be very
+                         * sparsely populated, exit the loop without meeting the sampling
+                         * requirement. */
+                        if (current_db_keys < (unsigned long) server.maxmemory_samples*10)
+                            break;
                     }
                 }
                 if (!total_keys) break; /* No keys to evict. */
@@ -606,13 +605,13 @@ int performEvictions(void) {
                     if (pool[k].key == NULL) continue;
                     bestdbid = pool[k].dbid;
 
+                    kvstore *kvs;
                     if (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) {
-                        de = dictFind(server.db[bestdbid].dict,
-                            pool[k].key);
+                        kvs = server.db[bestdbid].keys;
                     } else {
-                        de = dictFind(server.db[bestdbid].expires,
-                            pool[k].key);
+                        kvs = server.db[bestdbid].expires;
                     }
+                    de = kvstoreDictFind(kvs, pool[k].slot, pool[k].key);
 
                     /* Remove the entry from the pool. */
                     if (pool[k].key != pool[k].cached)
@@ -642,10 +641,15 @@ int performEvictions(void) {
             for (i = 0; i < server.dbnum; i++) {
                 j = (++next_db) % server.dbnum;
                 db = server.db+j;
-                dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) ?
-                        db->dict : db->expires;
-                if (dictSize(dict) != 0) {
-                    de = dictGetRandomKey(dict);
+                kvstore *kvs;
+                if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) {
+                    kvs = db->keys;
+                } else {
+                    kvs = db->expires;
+                }
+                int slot = kvstoreGetFairRandomDictIndex(kvs);
+                de = kvstoreDictGetRandomKey(kvs, slot);
+                if (de) {
                     bestkey = dictGetKey(de);
                     bestdbid = j;
                     break;
@@ -667,6 +671,7 @@ int performEvictions(void) {
              *
              * AOF and Output buffer memory will be freed eventually so
              * we only care about memory used by the key space. */
+            enterExecutionUnit(1, 0);
             delta = (long long) zmalloc_used_memory();
             latencyStartMonitor(eviction_latency);
             dbGenericDelete(db,keyobj,server.lazyfree_lazy_eviction,DB_FLAG_KEY_EVICTED);
@@ -679,6 +684,7 @@ int performEvictions(void) {
             notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted",
                 keyobj, db->id);
             propagateDeletion(db,keyobj,server.lazyfree_lazy_eviction);
+            exitExecutionUnit();
             postExecutionUnitOperations();
             decrRefCount(keyobj);
             keys_freed++;
diff --git a/src/expire.c b/src/expire.c
index 425491af6bc..646f752a9c4 100644
--- a/src/expire.c
+++ b/src/expire.c
@@ -2,32 +2,11 @@
  *
  * ----------------------------------------------------------------------------
  *
- * Copyright (c) 2009-2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -40,6 +19,10 @@
  * if no access is performed on them.
  *----------------------------------------------------------------------------*/
 
+/* Constants table from pow(0.98, 1) to pow(0.98, 16). 
+ * Help calculating the db->avg_ttl. */
+static double avg_ttl_factor[16] = {0.98, 0.9604, 0.941192, 0.922368, 0.903921, 0.885842, 0.868126, 0.850763, 0.833748, 0.817073, 0.800731, 0.784717, 0.769022, 0.753642, 0.738569, 0.723798};
+
 /* Helper function for the activeExpireCycle() function.
  * This function will try to expire the key that is stored in the hash table
  * entry 'de' of the 'expires' hash table of a Redis database.
@@ -54,10 +37,12 @@
 int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) {
     long long t = dictGetSignedIntegerVal(de);
     if (now > t) {
+        enterExecutionUnit(1, 0);
         sds key = dictGetKey(de);
         robj *keyobj = createStringObject(key,sdslen(key));
         deleteExpiredKeyAndPropagate(db,keyobj);
         decrRefCount(keyobj);
+        exitExecutionUnit();
         return 1;
     } else {
         return 0;
@@ -109,6 +94,7 @@ int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) {
 #define ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC 25 /* Max % of CPU to use. */
 #define ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE 10 /* % of stale keys after which
                                                    we do extra efforts. */
+#define HFE_ACTIVE_EXPIRE_CYCLE_FIELDS 1000
 
 /* Data used by the expire dict scan callback. */
 typedef struct {
@@ -137,6 +123,65 @@ void expireScanCallback(void *privdata, const dictEntry *const_de) {
     data->sampled++;
 }
 
+static inline int isExpiryDictValidForSamplingCb(dict *d) {
+    long long numkeys = dictSize(d);
+    unsigned long buckets = dictBuckets(d);
+    /* When there are less than 1% filled buckets, sampling the key
+     * space is expensive, so stop here waiting for better times...
+     * The dictionary will be resized asap. */
+    if (buckets > DICT_HT_INITIAL_SIZE && (numkeys * 100/buckets < 1)) {
+        return C_ERR;
+    }
+    return C_OK;
+}
+
+/* Active expiration Cycle for hash-fields.
+ *
+ * Note that releasing fields is expected to be more predictable and rewarding
+ * than releasing keys because it is stored in `ebuckets` DS which optimized for
+ * active expiration and in addition the deletion of fields is simple to handle. */
+static inline void activeExpireHashFieldCycle(int type) {
+    /* Remember current db across calls */
+    static unsigned int currentDb = 0;
+
+    /* Tracks the count of fields actively expired for the current database.
+     * This count continues as long as it fails to actively expire all expired
+     * fields of currentDb, indicating a possible need to adjust the value of
+     * maxToExpire. */
+    static uint64_t activeExpirySequence = 0;
+    /* Threshold for adjusting maxToExpire */
+    const uint32_t EXPIRED_FIELDS_TH = 1000000;
+    /* Maximum number of fields to actively expire in a single call */
+    uint32_t maxToExpire = HFE_ACTIVE_EXPIRE_CYCLE_FIELDS;
+
+    redisDb *db = server.db + currentDb;
+
+    /* If db is empty, move to next db and return */
+    if (ebIsEmpty(db->hexpires)) {
+        activeExpirySequence = 0;
+        currentDb = (currentDb + 1) % server.dbnum;
+        return;
+    }
+
+    /* If running for a while and didn't manage to active-expire all expired fields of
+     * currentDb (i.e. activeExpirySequence becomes significant) then adjust maxToExpire */
+    if ((activeExpirySequence > EXPIRED_FIELDS_TH) && (type == ACTIVE_EXPIRE_CYCLE_SLOW)) {
+        /* maxToExpire is multiplied by a factor between 1 and 32, proportional to
+         * the number of times activeExpirySequence exceeded EXPIRED_FIELDS_TH */
+        uint64_t factor = activeExpirySequence / EXPIRED_FIELDS_TH;
+        maxToExpire *= (factor<32) ? factor : 32;
+    }
+
+    if (hashTypeDbActiveExpire(db, maxToExpire) == maxToExpire) {
+        /* active-expire reached maxToExpire limit */
+        activeExpirySequence += maxToExpire;
+    } else {
+        /* Managed to active-expire all expired fields of currentDb */
+        activeExpirySequence = 0;
+        currentDb = (currentDb + 1) % server.dbnum;
+    }
+}
+
 void activeExpireCycle(int type) {
     /* Adjust the running parameters according to the configured expire
      * effort. The default effort is 1, and the maximum configurable effort
@@ -160,6 +205,7 @@ void activeExpireCycle(int type) {
 
     int j, iteration = 0;
     int dbs_per_call = CRON_DBS_PER_CALL;
+    int dbs_performed = 0;
     long long start = ustime(), timelimit, elapsed;
 
     /* If 'expire' action is paused, for whatever reason, then don't expire any key.
@@ -212,46 +258,55 @@ void activeExpireCycle(int type) {
     /* Try to smoke-out bugs (server.also_propagate should be empty here) */
     serverAssert(server.also_propagate.numops == 0);
 
-    for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) {
+    /* Stop iteration when one of the following conditions is met:
+     *
+     * 1) We have checked a sufficient number of databases with expiration time.
+     * 2) The time limit has been exceeded.
+     * 3) All databases have been traversed. */
+    for (j = 0; dbs_performed < dbs_per_call && timelimit_exit == 0 && j < server.dbnum; j++) {
         /* Scan callback data including expired and checked count per iteration. */
         expireScanData data;
+        data.ttl_sum = 0;
+        data.ttl_samples = 0;
 
         redisDb *db = server.db+(current_db % server.dbnum);
         data.db = db;
 
+        int db_done = 0; /* The scan of the current DB is done? */
+        int update_avg_ttl_times = 0, repeat = 0;
+
         /* Increment the DB now so we are sure if we run out of time
          * in the current DB we'll restart from the next. This allows to
          * distribute the time evenly across DBs. */
         current_db++;
 
+        /* Interleaving hash-field expiration with key expiration. Better
+         * call it before handling expired keys because HFE DS is optimized for
+         * active expiration */
+        activeExpireHashFieldCycle(type);
+
+        if (kvstoreSize(db->expires))
+            dbs_performed++;
+
         /* Continue to expire if at the end of the cycle there are still
          * a big percentage of keys to expire, compared to the number of keys
          * we scanned. The percentage, stored in config_cycle_acceptable_stale
          * is not fixed, but depends on the Redis configured "expire effort". */
         do {
-            unsigned long num, slots;
+            unsigned long num;
             iteration++;
 
             /* If there is nothing to expire try next DB ASAP. */
-            if ((num = dictSize(db->expires)) == 0) {
+            if ((num = kvstoreSize(db->expires)) == 0) {
                 db->avg_ttl = 0;
                 break;
             }
-            slots = dictSlots(db->expires);
             data.now = mstime();
 
-            /* When there are less than 1% filled slots, sampling the key
-             * space is expensive, so stop here waiting for better times...
-             * The dictionary will be resized asap. */
-            if (slots > DICT_HT_INITIAL_SIZE &&
-                (num*100/slots < 1)) break;
-
             /* The main collection cycle. Scan through keys among keys
              * with an expire set, checking for expired ones. */
             data.sampled = 0;
             data.expired = 0;
-            data.ttl_sum = 0;
-            data.ttl_samples = 0;
 
             if (num > config_keys_per_loop)
                 num = config_keys_per_loop;
@@ -269,41 +324,70 @@ void activeExpireCycle(int type) {
             long max_buckets = num*20;
             long checked_buckets = 0;
 
+            int origin_ttl_samples = data.ttl_samples;
+
             while (data.sampled < num && checked_buckets < max_buckets) {
-                db->expires_cursor = dictScan(db->expires, db->expires_cursor,
-                                              expireScanCallback, &data);
+                db->expires_cursor = kvstoreScan(db->expires, db->expires_cursor, -1, expireScanCallback, isExpiryDictValidForSamplingCb, &data);
+                if (db->expires_cursor == 0) {
+                    db_done = 1;
+                    break;
+                }
                 checked_buckets++;
             }
             total_expired += data.expired;
             total_sampled += data.sampled;
 
-            /* Update the average TTL stats for this database. */
-            if (data.ttl_samples) {
-                long long avg_ttl = data.ttl_sum / data.ttl_samples;
+            /* If find keys with ttl not yet expired, we need to update the average TTL stats once. */
+            if (data.ttl_samples - origin_ttl_samples > 0) update_avg_ttl_times++;
 
-                /* Do a simple running average with a few samples.
-                 * We just use the current estimate with a weight of 2%
-                 * and the previous estimate with a weight of 98%. */
-                if (db->avg_ttl == 0) db->avg_ttl = avg_ttl;
-                db->avg_ttl = (db->avg_ttl/50)*49 + (avg_ttl/50);
-            }
+            /* We don't repeat the cycle for the current database if the db is done
+             * for scanning or an acceptable number of stale keys (logically expired
+             * but yet not reclaimed). */
+            repeat = db_done ? 0 : (data.sampled == 0 || (data.expired * 100 / data.sampled) > config_cycle_acceptable_stale);
 
             /* We can't block forever here even if there are many keys to
-             * expire. So after a given amount of milliseconds return to the
+             * expire. So after a given amount of microseconds return to the
              * caller waiting for the other active expire cycle. */
-            if ((iteration & 0xf) == 0) { /* check once every 16 iterations. */
-                elapsed = ustime()-start;
-                if (elapsed > timelimit) {
-                    timelimit_exit = 1;
-                    server.stat_expired_time_cap_reached_count++;
-                    break;
+            if ((iteration & 0xf) == 0 || !repeat) { /* Update the average TTL stats every 16 iterations or about to exit. */
+                /* Update the average TTL stats for this database, 
+                 * because this may reach the time limit. */
+                if (data.ttl_samples) {
+                    long long avg_ttl = data.ttl_sum / data.ttl_samples;
+
+                    /* Do a simple running average with a few samples.
+                     * We just use the current estimate with a weight of 2%
+                     * and the previous estimate with a weight of 98%. */
+                    if (db->avg_ttl == 0) {
+                        db->avg_ttl = avg_ttl;
+                    } else {
+                        /* The origin code is as follow.
+                         * for (int i = 0; i < update_avg_ttl_times; i++) {
+                         *   db->avg_ttl = (db->avg_ttl/50)*49 + (avg_ttl/50);
+                         * } 
+                         * We can convert the loop into a sum of a geometric progression.
+                         * db->avg_ttl = db->avg_ttl * pow(0.98, update_avg_ttl_times) + 
+                         *                  avg_ttl / 50 * (pow(0.98, update_avg_ttl_times - 1) + ... + 1) 
+                         *             = db->avg_ttl * pow(0.98, update_avg_ttl_times) + 
+                         *                  avg_ttl * (1 - pow(0.98, update_avg_ttl_times))
+                         *             = avg_ttl +  (db->avg_ttl - avg_ttl) * pow(0.98, update_avg_ttl_times) 
+                         * Notice that update_avg_ttl_times is between 1 and 16, we use a constant table 
+                         * to accelerate the calculation of pow(0.98, update_avg_ttl_times).*/
+                        db->avg_ttl = avg_ttl + (db->avg_ttl - avg_ttl) * avg_ttl_factor[update_avg_ttl_times - 1] ;
+                    }
+                    update_avg_ttl_times = 0;
+                    data.ttl_sum = 0;
+                    data.ttl_samples = 0;
+                }
+                if ((iteration & 0xf) == 0) { /* check time limit every 16 iterations. */
+                    elapsed = ustime()-start;
+                    if (elapsed > timelimit) {
+                        timelimit_exit = 1;
+                        server.stat_expired_time_cap_reached_count++;
+                        break;
+                    }
                 }
             }
-            /* We don't repeat the cycle for the current database if there are
-             * an acceptable amount of stale keys (logically expired but yet
-             * not reclaimed). */
-        } while (data.sampled == 0 ||
-                 (data.expired * 100 / data.sampled) > config_cycle_acceptable_stale);
+        } while (repeat);
     }
 
     elapsed = ustime()-start;
@@ -378,7 +462,7 @@ void expireSlaveKeys(void) {
         while(dbids && dbid < server.dbnum) {
             if ((dbids & 1) != 0) {
                 redisDb *db = server.db+dbid;
-                dictEntry *expire = dictFind(db->expires,keyname);
+                dictEntry *expire = dbFindExpires(db, keyname);
                 int expired = 0;
 
                 if (expire &&
diff --git a/src/fmacros.h b/src/fmacros.h
index c5da4b7345a..92791cbcec7 100644
--- a/src/fmacros.h
+++ b/src/fmacros.h
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef _REDIS_FMACRO_H
diff --git a/src/fmtargs.h b/src/fmtargs.h
new file mode 100644
index 00000000000..e52d3b99c50
--- /dev/null
+++ b/src/fmtargs.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright Redis Contributors.
+ * All rights reserved.
+ * SPDX-License-Identifier: BSD 3-Clause
+ *
+ * To make it easier to map each part of the format string with each argument,
+ * this file provides a way to write
+ *
+ *     printf("a = %s, b = %s, c = %s\n",
+ *            arg1, arg2, arg3);
+ *
+ * as
+ *
+ *     printf(FMTARGS("a = %s, ", arg1,
+ *                    "b = %s, ", arg2,
+ *                    "c = %s\n", arg3));
+ *
+ * FMTARGS is variadic macro which is implemented by passing on its arguments to
+ * two other variadic macros of which one extracts the odd (the formats) and the
+ * other extracts the even (the arguments). The definitions of these macros
+ * include counting the number of macro arguments. Therefore, they don't accept
+ * an unlimited number of arguments. Currently it is fixed to a maximum of 120
+ * formats and arguments.
+ */
+#ifndef FMTARGS_H
+#define FMTARGS_H
+
+/* A macro to count the number of arguments. */
+#define NARG(...) NARG_I(__VA_ARGS__,RSEQ_N())
+#define NARG_I(...) ARG_N(__VA_ARGS__)
+
+/* Define a macro which will call an arbitrary macro appended with a number indicating
+ * the number of arguments it has. */
+#define VFUNC_N_(name, n) name##n
+#define VFUNC_N(name, n) VFUNC_N_(name, n)
+#define VFUNC(func, ...) VFUNC_N(func, NARG(__VA_ARGS__)) (__VA_ARGS__)
+
+/* Macros to extract the formats and the arguments from the fmt-arg pairs and
+ * then combine them again with all formats first and the arguments last. */
+#define COMPACT_FMT(...) VFUNC(COMPACT_FMT_, __VA_ARGS__)
+#define COMPACT_VALUES(...) VFUNC(COMPACT_VALUES_, __VA_ARGS__)
+#define FMTARGS(...) COMPACT_FMT(__VA_ARGS__), COMPACT_VALUES(__VA_ARGS__)
+
+/* Everything below this line is automatically generated by
+ * generate-fmtargs.py. Do not manually edit. */
+
+#define ARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58, _59, _60, _61, _62, _63, _64, _65, _66, _67, _68, _69, _70, _71, _72, _73, _74, _75, _76, _77, _78, _79, _80, _81, _82, _83, _84, _85, _86, _87, _88, _89, _90, _91, _92, _93, _94, _95, _96, _97, _98, _99, _100, _101, _102, _103, _104, _105, _106, _107, _108, _109, _110, _111, _112, _113, _114, _115, _116, _117, _118, _119, _120, N, ...) N
+
+#define RSEQ_N() 120, 119, 118, 117, 116, 115, 114, 113, 112, 111, 110, 109, 108, 107, 106, 105, 104, 103, 102, 101, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 87, 86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+#define COMPACT_FMT_2(fmt, value) fmt
+#define COMPACT_FMT_4(fmt, value, ...) fmt COMPACT_FMT_2(__VA_ARGS__)
+#define COMPACT_FMT_6(fmt, value, ...) fmt COMPACT_FMT_4(__VA_ARGS__)
+#define COMPACT_FMT_8(fmt, value, ...) fmt COMPACT_FMT_6(__VA_ARGS__)
+#define COMPACT_FMT_10(fmt, value, ...) fmt COMPACT_FMT_8(__VA_ARGS__)
+#define COMPACT_FMT_12(fmt, value, ...) fmt COMPACT_FMT_10(__VA_ARGS__)
+#define COMPACT_FMT_14(fmt, value, ...) fmt COMPACT_FMT_12(__VA_ARGS__)
+#define COMPACT_FMT_16(fmt, value, ...) fmt COMPACT_FMT_14(__VA_ARGS__)
+#define COMPACT_FMT_18(fmt, value, ...) fmt COMPACT_FMT_16(__VA_ARGS__)
+#define COMPACT_FMT_20(fmt, value, ...) fmt COMPACT_FMT_18(__VA_ARGS__)
+#define COMPACT_FMT_22(fmt, value, ...) fmt COMPACT_FMT_20(__VA_ARGS__)
+#define COMPACT_FMT_24(fmt, value, ...) fmt COMPACT_FMT_22(__VA_ARGS__)
+#define COMPACT_FMT_26(fmt, value, ...) fmt COMPACT_FMT_24(__VA_ARGS__)
+#define COMPACT_FMT_28(fmt, value, ...) fmt COMPACT_FMT_26(__VA_ARGS__)
+#define COMPACT_FMT_30(fmt, value, ...) fmt COMPACT_FMT_28(__VA_ARGS__)
+#define COMPACT_FMT_32(fmt, value, ...) fmt COMPACT_FMT_30(__VA_ARGS__)
+#define COMPACT_FMT_34(fmt, value, ...) fmt COMPACT_FMT_32(__VA_ARGS__)
+#define COMPACT_FMT_36(fmt, value, ...) fmt COMPACT_FMT_34(__VA_ARGS__)
+#define COMPACT_FMT_38(fmt, value, ...) fmt COMPACT_FMT_36(__VA_ARGS__)
+#define COMPACT_FMT_40(fmt, value, ...) fmt COMPACT_FMT_38(__VA_ARGS__)
+#define COMPACT_FMT_42(fmt, value, ...) fmt COMPACT_FMT_40(__VA_ARGS__)
+#define COMPACT_FMT_44(fmt, value, ...) fmt COMPACT_FMT_42(__VA_ARGS__)
+#define COMPACT_FMT_46(fmt, value, ...) fmt COMPACT_FMT_44(__VA_ARGS__)
+#define COMPACT_FMT_48(fmt, value, ...) fmt COMPACT_FMT_46(__VA_ARGS__)
+#define COMPACT_FMT_50(fmt, value, ...) fmt COMPACT_FMT_48(__VA_ARGS__)
+#define COMPACT_FMT_52(fmt, value, ...) fmt COMPACT_FMT_50(__VA_ARGS__)
+#define COMPACT_FMT_54(fmt, value, ...) fmt COMPACT_FMT_52(__VA_ARGS__)
+#define COMPACT_FMT_56(fmt, value, ...) fmt COMPACT_FMT_54(__VA_ARGS__)
+#define COMPACT_FMT_58(fmt, value, ...) fmt COMPACT_FMT_56(__VA_ARGS__)
+#define COMPACT_FMT_60(fmt, value, ...) fmt COMPACT_FMT_58(__VA_ARGS__)
+#define COMPACT_FMT_62(fmt, value, ...) fmt COMPACT_FMT_60(__VA_ARGS__)
+#define COMPACT_FMT_64(fmt, value, ...) fmt COMPACT_FMT_62(__VA_ARGS__)
+#define COMPACT_FMT_66(fmt, value, ...) fmt COMPACT_FMT_64(__VA_ARGS__)
+#define COMPACT_FMT_68(fmt, value, ...) fmt COMPACT_FMT_66(__VA_ARGS__)
+#define COMPACT_FMT_70(fmt, value, ...) fmt COMPACT_FMT_68(__VA_ARGS__)
+#define COMPACT_FMT_72(fmt, value, ...) fmt COMPACT_FMT_70(__VA_ARGS__)
+#define COMPACT_FMT_74(fmt, value, ...) fmt COMPACT_FMT_72(__VA_ARGS__)
+#define COMPACT_FMT_76(fmt, value, ...) fmt COMPACT_FMT_74(__VA_ARGS__)
+#define COMPACT_FMT_78(fmt, value, ...) fmt COMPACT_FMT_76(__VA_ARGS__)
+#define COMPACT_FMT_80(fmt, value, ...) fmt COMPACT_FMT_78(__VA_ARGS__)
+#define COMPACT_FMT_82(fmt, value, ...) fmt COMPACT_FMT_80(__VA_ARGS__)
+#define COMPACT_FMT_84(fmt, value, ...) fmt COMPACT_FMT_82(__VA_ARGS__)
+#define COMPACT_FMT_86(fmt, value, ...) fmt COMPACT_FMT_84(__VA_ARGS__)
+#define COMPACT_FMT_88(fmt, value, ...) fmt COMPACT_FMT_86(__VA_ARGS__)
+#define COMPACT_FMT_90(fmt, value, ...) fmt COMPACT_FMT_88(__VA_ARGS__)
+#define COMPACT_FMT_92(fmt, value, ...) fmt COMPACT_FMT_90(__VA_ARGS__)
+#define COMPACT_FMT_94(fmt, value, ...) fmt COMPACT_FMT_92(__VA_ARGS__)
+#define COMPACT_FMT_96(fmt, value, ...) fmt COMPACT_FMT_94(__VA_ARGS__)
+#define COMPACT_FMT_98(fmt, value, ...) fmt COMPACT_FMT_96(__VA_ARGS__)
+#define COMPACT_FMT_100(fmt, value, ...) fmt COMPACT_FMT_98(__VA_ARGS__)
+#define COMPACT_FMT_102(fmt, value, ...) fmt COMPACT_FMT_100(__VA_ARGS__)
+#define COMPACT_FMT_104(fmt, value, ...) fmt COMPACT_FMT_102(__VA_ARGS__)
+#define COMPACT_FMT_106(fmt, value, ...) fmt COMPACT_FMT_104(__VA_ARGS__)
+#define COMPACT_FMT_108(fmt, value, ...) fmt COMPACT_FMT_106(__VA_ARGS__)
+#define COMPACT_FMT_110(fmt, value, ...) fmt COMPACT_FMT_108(__VA_ARGS__)
+#define COMPACT_FMT_112(fmt, value, ...) fmt COMPACT_FMT_110(__VA_ARGS__)
+#define COMPACT_FMT_114(fmt, value, ...) fmt COMPACT_FMT_112(__VA_ARGS__)
+#define COMPACT_FMT_116(fmt, value, ...) fmt COMPACT_FMT_114(__VA_ARGS__)
+#define COMPACT_FMT_118(fmt, value, ...) fmt COMPACT_FMT_116(__VA_ARGS__)
+#define COMPACT_FMT_120(fmt, value, ...) fmt COMPACT_FMT_118(__VA_ARGS__)
+
+#define COMPACT_VALUES_2(fmt, value) value
+#define COMPACT_VALUES_4(fmt, value, ...) value, COMPACT_VALUES_2(__VA_ARGS__)
+#define COMPACT_VALUES_6(fmt, value, ...) value, COMPACT_VALUES_4(__VA_ARGS__)
+#define COMPACT_VALUES_8(fmt, value, ...) value, COMPACT_VALUES_6(__VA_ARGS__)
+#define COMPACT_VALUES_10(fmt, value, ...) value, COMPACT_VALUES_8(__VA_ARGS__)
+#define COMPACT_VALUES_12(fmt, value, ...) value, COMPACT_VALUES_10(__VA_ARGS__)
+#define COMPACT_VALUES_14(fmt, value, ...) value, COMPACT_VALUES_12(__VA_ARGS__)
+#define COMPACT_VALUES_16(fmt, value, ...) value, COMPACT_VALUES_14(__VA_ARGS__)
+#define COMPACT_VALUES_18(fmt, value, ...) value, COMPACT_VALUES_16(__VA_ARGS__)
+#define COMPACT_VALUES_20(fmt, value, ...) value, COMPACT_VALUES_18(__VA_ARGS__)
+#define COMPACT_VALUES_22(fmt, value, ...) value, COMPACT_VALUES_20(__VA_ARGS__)
+#define COMPACT_VALUES_24(fmt, value, ...) value, COMPACT_VALUES_22(__VA_ARGS__)
+#define COMPACT_VALUES_26(fmt, value, ...) value, COMPACT_VALUES_24(__VA_ARGS__)
+#define COMPACT_VALUES_28(fmt, value, ...) value, COMPACT_VALUES_26(__VA_ARGS__)
+#define COMPACT_VALUES_30(fmt, value, ...) value, COMPACT_VALUES_28(__VA_ARGS__)
+#define COMPACT_VALUES_32(fmt, value, ...) value, COMPACT_VALUES_30(__VA_ARGS__)
+#define COMPACT_VALUES_34(fmt, value, ...) value, COMPACT_VALUES_32(__VA_ARGS__)
+#define COMPACT_VALUES_36(fmt, value, ...) value, COMPACT_VALUES_34(__VA_ARGS__)
+#define COMPACT_VALUES_38(fmt, value, ...) value, COMPACT_VALUES_36(__VA_ARGS__)
+#define COMPACT_VALUES_40(fmt, value, ...) value, COMPACT_VALUES_38(__VA_ARGS__)
+#define COMPACT_VALUES_42(fmt, value, ...) value, COMPACT_VALUES_40(__VA_ARGS__)
+#define COMPACT_VALUES_44(fmt, value, ...) value, COMPACT_VALUES_42(__VA_ARGS__)
+#define COMPACT_VALUES_46(fmt, value, ...) value, COMPACT_VALUES_44(__VA_ARGS__)
+#define COMPACT_VALUES_48(fmt, value, ...) value, COMPACT_VALUES_46(__VA_ARGS__)
+#define COMPACT_VALUES_50(fmt, value, ...) value, COMPACT_VALUES_48(__VA_ARGS__)
+#define COMPACT_VALUES_52(fmt, value, ...) value, COMPACT_VALUES_50(__VA_ARGS__)
+#define COMPACT_VALUES_54(fmt, value, ...) value, COMPACT_VALUES_52(__VA_ARGS__)
+#define COMPACT_VALUES_56(fmt, value, ...) value, COMPACT_VALUES_54(__VA_ARGS__)
+#define COMPACT_VALUES_58(fmt, value, ...) value, COMPACT_VALUES_56(__VA_ARGS__)
+#define COMPACT_VALUES_60(fmt, value, ...) value, COMPACT_VALUES_58(__VA_ARGS__)
+#define COMPACT_VALUES_62(fmt, value, ...) value, COMPACT_VALUES_60(__VA_ARGS__)
+#define COMPACT_VALUES_64(fmt, value, ...) value, COMPACT_VALUES_62(__VA_ARGS__)
+#define COMPACT_VALUES_66(fmt, value, ...) value, COMPACT_VALUES_64(__VA_ARGS__)
+#define COMPACT_VALUES_68(fmt, value, ...) value, COMPACT_VALUES_66(__VA_ARGS__)
+#define COMPACT_VALUES_70(fmt, value, ...) value, COMPACT_VALUES_68(__VA_ARGS__)
+#define COMPACT_VALUES_72(fmt, value, ...) value, COMPACT_VALUES_70(__VA_ARGS__)
+#define COMPACT_VALUES_74(fmt, value, ...) value, COMPACT_VALUES_72(__VA_ARGS__)
+#define COMPACT_VALUES_76(fmt, value, ...) value, COMPACT_VALUES_74(__VA_ARGS__)
+#define COMPACT_VALUES_78(fmt, value, ...) value, COMPACT_VALUES_76(__VA_ARGS__)
+#define COMPACT_VALUES_80(fmt, value, ...) value, COMPACT_VALUES_78(__VA_ARGS__)
+#define COMPACT_VALUES_82(fmt, value, ...) value, COMPACT_VALUES_80(__VA_ARGS__)
+#define COMPACT_VALUES_84(fmt, value, ...) value, COMPACT_VALUES_82(__VA_ARGS__)
+#define COMPACT_VALUES_86(fmt, value, ...) value, COMPACT_VALUES_84(__VA_ARGS__)
+#define COMPACT_VALUES_88(fmt, value, ...) value, COMPACT_VALUES_86(__VA_ARGS__)
+#define COMPACT_VALUES_90(fmt, value, ...) value, COMPACT_VALUES_88(__VA_ARGS__)
+#define COMPACT_VALUES_92(fmt, value, ...) value, COMPACT_VALUES_90(__VA_ARGS__)
+#define COMPACT_VALUES_94(fmt, value, ...) value, COMPACT_VALUES_92(__VA_ARGS__)
+#define COMPACT_VALUES_96(fmt, value, ...) value, COMPACT_VALUES_94(__VA_ARGS__)
+#define COMPACT_VALUES_98(fmt, value, ...) value, COMPACT_VALUES_96(__VA_ARGS__)
+#define COMPACT_VALUES_100(fmt, value, ...) value, COMPACT_VALUES_98(__VA_ARGS__)
+#define COMPACT_VALUES_102(fmt, value, ...) value, COMPACT_VALUES_100(__VA_ARGS__)
+#define COMPACT_VALUES_104(fmt, value, ...) value, COMPACT_VALUES_102(__VA_ARGS__)
+#define COMPACT_VALUES_106(fmt, value, ...) value, COMPACT_VALUES_104(__VA_ARGS__)
+#define COMPACT_VALUES_108(fmt, value, ...) value, COMPACT_VALUES_106(__VA_ARGS__)
+#define COMPACT_VALUES_110(fmt, value, ...) value, COMPACT_VALUES_108(__VA_ARGS__)
+#define COMPACT_VALUES_112(fmt, value, ...) value, COMPACT_VALUES_110(__VA_ARGS__)
+#define COMPACT_VALUES_114(fmt, value, ...) value, COMPACT_VALUES_112(__VA_ARGS__)
+#define COMPACT_VALUES_116(fmt, value, ...) value, COMPACT_VALUES_114(__VA_ARGS__)
+#define COMPACT_VALUES_118(fmt, value, ...) value, COMPACT_VALUES_116(__VA_ARGS__)
+#define COMPACT_VALUES_120(fmt, value, ...) value, COMPACT_VALUES_118(__VA_ARGS__)
+
+#endif
diff --git a/src/function_lua.c b/src/function_lua.c
index 91bb5cd67a1..61a20a4c62e 100644
--- a/src/function_lua.c
+++ b/src/function_lua.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2021, Redis Ltd.
+ * Copyright (c) 2021-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 /*
@@ -51,7 +30,6 @@
 #define REGISTRY_LOAD_CTX_NAME "__LIBRARY_CTX__"
 #define LIBRARY_API_NAME "__LIBRARY_API__"
 #define GLOBALS_API_NAME "__GLOBALS_API__"
-#define LOAD_TIMEOUT_MS 500
 
 /* Lua engine ctx */
 typedef struct luaEngineCtx {
@@ -67,6 +45,7 @@ typedef struct luaFunctionCtx {
 typedef struct loadCtx {
     functionLibInfo *li;
     monotime start_time;
+    size_t timeout;
 } loadCtx;
 
 typedef struct registerFunctionArgs {
@@ -85,7 +64,7 @@ static void luaEngineLoadHook(lua_State *lua, lua_Debug *ar) {
     loadCtx *load_ctx = luaGetFromRegistry(lua, REGISTRY_LOAD_CTX_NAME);
     serverAssert(load_ctx); /* Only supported inside script invocation */
     uint64_t duration = elapsedMs(load_ctx->start_time);
-    if (duration > LOAD_TIMEOUT_MS) {
+    if (load_ctx->timeout > 0 && duration > load_ctx->timeout) {
         lua_sethook(lua, luaEngineLoadHook, LUA_MASKLINE, 0);
 
         luaPushError(lua,"FUNCTION LOAD timeout");
@@ -100,7 +79,7 @@ static void luaEngineLoadHook(lua_State *lua, lua_Debug *ar) {
  *
  * Return NULL on compilation error and set the error to the err variable
  */
-static int luaEngineCreate(void *engine_ctx, functionLibInfo *li, sds blob, sds *err) {
+static int luaEngineCreate(void *engine_ctx, functionLibInfo *li, sds blob, size_t timeout, sds *err) {
     int ret = C_ERR;
     luaEngineCtx *lua_engine_ctx = engine_ctx;
     lua_State *lua = lua_engine_ctx->lua;
@@ -124,6 +103,7 @@ static int luaEngineCreate(void *engine_ctx, functionLibInfo *li, sds blob, sds
     loadCtx load_ctx = {
         .li = li,
         .start_time = getMonotonicUs(),
+        .timeout = timeout,
     };
     luaSaveOnRegistry(lua, REGISTRY_LOAD_CTX_NAME, &load_ctx);
 
@@ -422,7 +402,7 @@ static int luaRegisterFunction(lua_State *lua) {
 /* Initialize Lua engine, should be called once on start. */
 int luaEngineInitEngine(void) {
     luaEngineCtx *lua_engine_ctx = zmalloc(sizeof(*lua_engine_ctx));
-    lua_engine_ctx->lua = lua_open();
+    lua_engine_ctx->lua = createLuaState();
 
     luaRegisterRedisAPI(lua_engine_ctx->lua);
 
diff --git a/src/functions.c b/src/functions.c
index f5738ba79d6..427cda8d003 100644
--- a/src/functions.c
+++ b/src/functions.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2021, Redis Ltd.
+ * Copyright (c) 2011-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "functions.h"
@@ -33,6 +12,8 @@
 #include "adlist.h"
 #include "atomicvar.h"
 
+#define LOAD_TIMEOUT_MS 500
+
 typedef enum {
     restorePolicy_Flush, restorePolicy_Append, restorePolicy_Replace
 } restorePolicy;
@@ -116,10 +97,7 @@ dictType librariesDictType = {
 /* Dictionary of engines */
 static dict *engines = NULL;
 
-/* Libraries Ctx.
- * Contains the dictionary that map a library name to library object,
- * Contains the dictionary that map a function name to function object,
- * and the cache memory used by all the functions */
+/* Libraries Ctx. */
 static functionsLibCtx *curr_functions_lib_ctx = NULL;
 
 static size_t functionMallocSize(functionInfo *fi) {
@@ -497,7 +475,6 @@ static void functionListReplyFlags(client *c, functionInfo *fi) {
  * Return general information about all the libraries:
  * * Library name
  * * The engine used to run the Library
- * * Library description
  * * Functions list
  * * Library code (if WITHCODE is given)
  *
@@ -679,7 +656,6 @@ void fcallroCommand(client *c) {
  * is saved separately with the following information:
  * * Library name
  * * Engine name
- * * Library description
  * * Library code
  * RDB_OPCODE_FUNCTION2 is saved before each library to present
  * that the payload is a library.
@@ -838,7 +814,6 @@ void functionHelpCommand(client *c) {
 "    Return general information on all the libraries:",
 "    * Library name",
 "    * The engine used to run the Library",
-"    * Library description",
 "    * Functions list",
 "    * Library code (if WITHCODE is given)",
 "    It also possible to get only function that matches a pattern using LIBRARYNAME argument.",
@@ -892,9 +867,7 @@ static int functionsVerifyName(sds name) {
 
 int functionExtractLibMetaData(sds payload, functionsLibMataData *md, sds *err) {
     sds name = NULL;
-    sds desc = NULL;
     sds engine = NULL;
-    sds code = NULL;
     if (strncmp(payload, "#!", 2) != 0) {
         *err = sdsnew("Missing library metadata");
         return C_ERR;
@@ -946,9 +919,7 @@ int functionExtractLibMetaData(sds payload, functionsLibMataData *md, sds *err)
 
 error:
     if (name) sdsfree(name);
-    if (desc) sdsfree(desc);
     if (engine) sdsfree(engine);
-    if (code) sdsfree(code);
     sdsfreesplitres(parts, numparts);
     return C_ERR;
 }
@@ -961,7 +932,7 @@ void functionFreeLibMetaData(functionsLibMataData *md) {
 
 /* Compile and save the given library, return the loaded library name on success
  * and NULL on failure. In case on failure the err out param is set with relevant error message */
-sds functionsCreateWithLibraryCtx(sds code, int replace, sds* err, functionsLibCtx *lib_ctx) {
+sds functionsCreateWithLibraryCtx(sds code, int replace, sds* err, functionsLibCtx *lib_ctx, size_t timeout) {
     dictIterator *iter = NULL;
     dictEntry *entry = NULL;
     functionLibInfo *new_li = NULL;
@@ -995,7 +966,7 @@ sds functionsCreateWithLibraryCtx(sds code, int replace, sds* err, functionsLibC
     }
 
     new_li = engineLibraryCreate(md.name, ei, code);
-    if (engine->create(engine->engine_ctx, new_li, md.code, err) != C_OK) {
+    if (engine->create(engine->engine_ctx, new_li, md.code, timeout, err) != C_OK) {
         goto error;
     }
 
@@ -1063,7 +1034,11 @@ void functionLoadCommand(client *c) {
     robj *code = c->argv[argc_pos];
     sds err = NULL;
     sds library_name = NULL;
-    if (!(library_name = functionsCreateWithLibraryCtx(code->ptr, replace, &err, curr_functions_lib_ctx)))
+    size_t timeout = LOAD_TIMEOUT_MS;
+    if (mustObeyClient(c)) {
+        timeout = 0;
+    }
+    if (!(library_name = functionsCreateWithLibraryCtx(code->ptr, replace, &err, curr_functions_lib_ctx, timeout)))
     {
         addReplyErrorSds(c, err);
         return;
@@ -1078,15 +1053,15 @@ void functionLoadCommand(client *c) {
 unsigned long functionsMemory(void) {
     dictIterator *iter = dictGetIterator(engines);
     dictEntry *entry = NULL;
-    size_t engines_nemory = 0;
+    size_t engines_memory = 0;
     while ((entry = dictNext(iter))) {
         engineInfo *ei = dictGetVal(entry);
         engine *engine = ei->engine;
-        engines_nemory += engine->get_used_memory(engine->engine_ctx);
+        engines_memory += engine->get_used_memory(engine->engine_ctx);
     }
     dictReleaseIterator(iter);
 
-    return engines_nemory;
+    return engines_memory;
 }
 
 /* Return memory overhead of all the engines combine */
@@ -1113,7 +1088,7 @@ dict* functionsLibGet(void) {
     return curr_functions_lib_ctx->libraries;
 }
 
-size_t functionsLibCtxfunctionsLen(functionsLibCtx *functions_ctx) {
+size_t functionsLibCtxFunctionsLen(functionsLibCtx *functions_ctx) {
     return dictSize(functions_ctx->functions);
 }
 
diff --git a/src/functions.h b/src/functions.h
index 26e45babc54..1d69e3794af 100644
--- a/src/functions.h
+++ b/src/functions.h
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2021, Redis Ltd.
+ * Copyright (c) 2021-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __FUNCTIONS_H_
@@ -32,11 +11,16 @@
 
 /*
  * functions.c unit provides the Redis Functions API:
- * * FUNCTION CREATE
- * * FUNCTION CALL
+ * * FUNCTION LOAD
+ * * FUNCTION LIST
+ * * FUNCTION CALL (FCALL and FCALL_RO)
  * * FUNCTION DELETE
+ * * FUNCTION STATS
  * * FUNCTION KILL
- * * FUNCTION INFO
+ * * FUNCTION FLUSH
+ * * FUNCTION DUMP
+ * * FUNCTION RESTORE
+ * * FUNCTION HELP
  *
  * Also contains implementation for:
  * * Save/Load function from rdb
@@ -53,9 +37,14 @@ typedef struct engine {
     /* engine specific context */
     void *engine_ctx;
 
-    /* Create function callback, get the engine_ctx, and function code.
-     * returns NULL on error and set sds to be the error message */
-    int (*create)(void *engine_ctx, functionLibInfo *li, sds code, sds *err);
+    /* Create function callback, get the engine_ctx, and function code
+     * engine_ctx - opaque struct that was created on engine initialization
+     * li - library information that need to be provided and when add functions
+     * code - the library code
+     * timeout - timeout for the library creation (0 for no timeout)
+     * err - description of error (if occurred)
+     * returns C_ERR on error and set err to be the error message */
+    int (*create)(void *engine_ctx, functionLibInfo *li, sds code, size_t timeout, sds *err);
 
     /* Invoking a function, r_ctx is an opaque object (from engine POV).
      * The r_ctx should be used by the engine to interaction with Redis,
@@ -109,13 +98,13 @@ struct functionLibInfo {
 };
 
 int functionsRegisterEngine(const char *engine_name, engine *engine_ctx);
-sds functionsCreateWithLibraryCtx(sds code, int replace, sds* err, functionsLibCtx *lib_ctx);
+sds functionsCreateWithLibraryCtx(sds code, int replace, sds* err, functionsLibCtx *lib_ctx, size_t timeout);
 unsigned long functionsMemory(void);
 unsigned long functionsMemoryOverhead(void);
 unsigned long functionsNum(void);
 unsigned long functionsLibNum(void);
 dict* functionsLibGet(void);
-size_t functionsLibCtxfunctionsLen(functionsLibCtx *functions_ctx);
+size_t functionsLibCtxFunctionsLen(functionsLibCtx *functions_ctx);
 functionsLibCtx* functionsLibCtxGetCurrent(void);
 functionsLibCtx* functionsLibCtxCreate(void);
 void functionsLibCtxClearCurrent(int async);
diff --git a/src/geo.c b/src/geo.c
index ac25a20c6cb..90817998a19 100644
--- a/src/geo.c
+++ b/src/geo.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
- * Copyright (c) 2015-2016, Salvatore Sanfilippo <antirez@gmail.com>.
+ * Copyright (c) 2015-current, Redis Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -300,7 +300,7 @@ int geoGetPointsInRange(robj *zobj, double min, double max, GeoShape *shape, geo
         zskiplist *zsl = zs->zsl;
         zskiplistNode *ln;
 
-        if ((ln = zslFirstInRange(zsl, &range)) == NULL) {
+        if ((ln = zslNthInRange(zsl, &range, 0)) == NULL) {
             /* Nothing exists starting at our min.  No results. */
             return 0;
         }
@@ -690,7 +690,7 @@ void georadiusGeneric(client *c, int srcKeyIndex, int flags) {
     }
 
     if (any && !count) {
-        addReplyErrorFormat(c, "the ANY argument requires COUNT argument");
+        addReplyError(c, "the ANY argument requires COUNT argument");
         return;
     }
 
diff --git a/src/geohash.c b/src/geohash.c
index 2cbcf287543..e9f0c654dd6 100644
--- a/src/geohash.c
+++ b/src/geohash.c
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>
  * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
- * Copyright (c) 2015-2016, Salvatore Sanfilippo <antirez@gmail.com>.
+ * Copyright (c) 2015-current, Redis Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/src/geohash.h b/src/geohash.h
index 4befb93039e..19fa5a1d0fd 100644
--- a/src/geohash.h
+++ b/src/geohash.h
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>
  * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
- * Copyright (c) 2015, Salvatore Sanfilippo <antirez@gmail.com>.
+ * Copyright (c) 2015-current, Redis Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/src/geohash_helper.c b/src/geohash_helper.c
index a3816fbe337..ba373268921 100644
--- a/src/geohash_helper.c
+++ b/src/geohash_helper.c
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>
  * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
- * Copyright (c) 2015-2016, Salvatore Sanfilippo <antirez@gmail.com>.
+ * Copyright (c) 2015-current, Redis Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/src/geohash_helper.h b/src/geohash_helper.h
index 56c731fadd6..262bd8e8da3 100644
--- a/src/geohash_helper.h
+++ b/src/geohash_helper.h
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>
  * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
- * Copyright (c) 2015, Salvatore Sanfilippo <antirez@gmail.com>.
+ * Copyright (c) 2015-current, Redis Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/src/hyperloglog.c b/src/hyperloglog.c
index 1a74f479377..cb0929f47e8 100644
--- a/src/hyperloglog.c
+++ b/src/hyperloglog.c
@@ -1,32 +1,11 @@
 /* hyperloglog.c - Redis HyperLogLog probabilistic cardinality approximation.
  * This file implements the algorithm and the exported Redis commands.
  *
- * Copyright (c) 2014, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2014-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -1220,10 +1199,10 @@ void pfaddCommand(client *c) {
     }
     hdr = o->ptr;
     if (updated) {
+        HLL_INVALIDATE_CACHE(hdr);
         signalModifiedKey(c,c->db,c->argv[1]);
         notifyKeyspaceEvent(NOTIFY_STRING,"pfadd",c->argv[1],c->db->id);
         server.dirty += updated;
-        HLL_INVALIDATE_CACHE(hdr);
     }
     addReply(c, updated ? shared.cone : shared.czero);
 }
diff --git a/src/intset.c b/src/intset.c
index 621a74283a2..5216251eb52 100644
--- a/src/intset.c
+++ b/src/intset.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-current, Redis Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/src/intset.h b/src/intset.h
index 41cc7b8222a..4259aaa7930 100644
--- a/src/intset.h
+++ b/src/intset.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-current, Redis Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/src/kvstore.c b/src/kvstore.c
new file mode 100644
index 00000000000..890b85c13c3
--- /dev/null
+++ b/src/kvstore.c
@@ -0,0 +1,1033 @@
+/*
+ * Index-based KV store implementation
+ * This file implements a KV store comprised of an array of dicts (see dict.c)
+ * The purpose of this KV store is to have easy access to all keys that belong
+ * in the same dict (i.e. are in the same dict-index)
+ *
+ * For example, when Redis is running in cluster mode, we use kvstore to save
+ * all keys that map to the same hash-slot in a separate dict within the kvstore
+ * struct.
+ * This enables us to easily access all keys that map to a specific hash-slot.
+ *
+ * Copyright (c) 2011-Present, Redis Ltd. and contributors.
+ * All rights reserved.
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
+ */
+#include "fmacros.h"
+
+#include <string.h>
+#include <stddef.h>
+
+#include "zmalloc.h"
+#include "kvstore.h"
+#include "redisassert.h"
+#include "monotonic.h"
+
+#define UNUSED(V) ((void) V)
+
+struct _kvstore {
+    int flags;
+    dictType dtype;
+    dict **dicts;
+    long long num_dicts;
+    long long num_dicts_bits;
+    list *rehashing;                       /* List of dictionaries in this kvstore that are currently rehashing. */
+    int resize_cursor;                     /* Cron job uses this cursor to gradually resize dictionaries (only used if num_dicts > 1). */
+    int allocated_dicts;                   /* The number of allocated dicts. */
+    int non_empty_dicts;                   /* The number of non-empty dicts. */
+    unsigned long long key_count;          /* Total number of keys in this kvstore. */
+    unsigned long long bucket_count;       /* Total number of buckets in this kvstore across dictionaries. */
+    unsigned long long *dict_size_index;   /* Binary indexed tree (BIT) that describes cumulative key frequencies up until given dict-index. */
+    size_t overhead_hashtable_lut;         /* The overhead of all dictionaries. */
+    size_t overhead_hashtable_rehashing;   /* The overhead of dictionaries rehashing. */
+};
+
+/* Structure for kvstore iterator that allows iterating across multiple dicts. */
+struct _kvstoreIterator {
+    kvstore *kvs;
+    long long didx;
+    long long next_didx;
+    dictIterator di;
+};
+
+/* Structure for kvstore dict iterator that allows iterating the corresponding dict. */
+struct _kvstoreDictIterator {
+    kvstore *kvs;
+    long long didx;
+    dictIterator di;
+};
+
+/* Dict metadata for database, used for record the position in rehashing list. */
+typedef struct {
+    listNode *rehashing_node;   /* list node in rehashing list */
+} kvstoreDictMetadata;
+
+/**********************************/
+/*** Helpers **********************/
+/**********************************/
+
+/* Get the dictionary pointer based on dict-index. */
+static dict *kvstoreGetDict(kvstore *kvs, int didx) {
+    return kvs->dicts[didx];
+}
+
+static dict **kvstoreGetDictRef(kvstore *kvs, int didx) {
+    return &kvs->dicts[didx];
+}
+
+static int kvstoreDictIsRehashingPaused(kvstore *kvs, int didx)
+{
+    dict *d = kvstoreGetDict(kvs, didx);
+    return d ? dictIsRehashingPaused(d) : 0;
+}
+
+/* Returns total (cumulative) number of keys up until given dict-index (inclusive).
+ * Time complexity is O(log(kvs->num_dicts)). */
+static unsigned long long cumulativeKeyCountRead(kvstore *kvs, int didx) {
+    if (kvs->num_dicts == 1) {
+        assert(didx == 0);
+        return kvstoreSize(kvs);
+    }
+    int idx = didx + 1;
+    unsigned long long sum = 0;
+    while (idx > 0) {
+        sum += kvs->dict_size_index[idx];
+        idx -= (idx & -idx);
+    }
+    return sum;
+}
+
+static void addDictIndexToCursor(kvstore *kvs, int didx, unsigned long long *cursor) {
+    if (kvs->num_dicts == 1)
+        return;
+    /* didx can be -1 when iteration is over and there are no more dicts to visit. */
+    if (didx < 0)
+        return;
+    *cursor = (*cursor << kvs->num_dicts_bits) | didx;
+}
+
+static int getAndClearDictIndexFromCursor(kvstore *kvs, unsigned long long *cursor) {
+    if (kvs->num_dicts == 1)
+        return 0;
+    int didx = (int) (*cursor & (kvs->num_dicts-1));
+    *cursor = *cursor >> kvs->num_dicts_bits;
+    return didx;
+}
+
+/* Updates binary index tree (also known as Fenwick tree), increasing key count for a given dict.
+ * You can read more about this data structure here https://en.wikipedia.org/wiki/Fenwick_tree
+ * Time complexity is O(log(kvs->num_dicts)). */
+static void cumulativeKeyCountAdd(kvstore *kvs, int didx, long delta) {
+    kvs->key_count += delta;
+
+    dict *d = kvstoreGetDict(kvs, didx);
+    size_t dsize = dictSize(d);
+    int non_empty_dicts_delta = dsize == 1? 1 : dsize == 0? -1 : 0;
+    kvs->non_empty_dicts += non_empty_dicts_delta;
+
+    /* BIT does not need to be calculated when there's only one dict. */
+    if (kvs->num_dicts == 1)
+        return;
+
+    /* Update the BIT */
+    int idx = didx + 1; /* Unlike dict indices, BIT is 1-based, so we need to add 1. */
+    while (idx <= kvs->num_dicts) {
+        if (delta < 0) {
+            assert(kvs->dict_size_index[idx] >= (unsigned long long)labs(delta));
+        }
+        kvs->dict_size_index[idx] += delta;
+        idx += (idx & -idx);
+    }
+}
+
+/* Create the dict if it does not exist and return it. */
+static dict *createDictIfNeeded(kvstore *kvs, int didx) {
+    dict *d = kvstoreGetDict(kvs, didx);
+    if (d) return d;
+
+    kvs->dicts[didx] = dictCreate(&kvs->dtype);
+    kvs->allocated_dicts++;
+    return kvs->dicts[didx];
+}
+
+/* Called when the dict will delete entries, the function will check
+ * KVSTORE_FREE_EMPTY_DICTS to determine whether the empty dict needs
+ * to be freed.
+ *
+ * Note that for rehashing dicts, that is, in the case of safe iterators
+ * and Scan, we won't delete the dict. We will check whether it needs
+ * to be deleted when we're releasing the iterator. */
+static void freeDictIfNeeded(kvstore *kvs, int didx) {
+    if (!(kvs->flags & KVSTORE_FREE_EMPTY_DICTS) ||
+        !kvstoreGetDict(kvs, didx) ||
+        kvstoreDictSize(kvs, didx) != 0 ||
+        kvstoreDictIsRehashingPaused(kvs, didx))
+        return;
+    dictRelease(kvs->dicts[didx]);
+    kvs->dicts[didx] = NULL;
+    kvs->allocated_dicts--;
+}
+
+/**********************************/
+/*** dict callbacks ***************/
+/**********************************/
+
+/* Adds dictionary to the rehashing list, which allows us
+ * to quickly find rehash targets during incremental rehashing.
+ *
+ * If there are multiple dicts, updates the bucket count for the given dictionary
+ * in a DB, bucket count incremented with the new ht size during the rehashing phase.
+ * If there's one dict, bucket count can be retrieved directly from single dict bucket. */
+static void kvstoreDictRehashingStarted(dict *d) {
+    kvstore *kvs = d->type->userdata;
+    kvstoreDictMetadata *metadata = (kvstoreDictMetadata *)dictMetadata(d);
+    listAddNodeTail(kvs->rehashing, d);
+    metadata->rehashing_node = listLast(kvs->rehashing);
+
+    unsigned long long from, to;
+    dictRehashingInfo(d, &from, &to);
+    kvs->bucket_count += to; /* Started rehashing (Add the new ht size) */
+    kvs->overhead_hashtable_lut += to;
+    kvs->overhead_hashtable_rehashing += from;
+}
+
+/* Remove dictionary from the rehashing list.
+ *
+ * Updates the bucket count for the given dictionary in a DB. It removes
+ * the old ht size of the dictionary from the total sum of buckets for a DB.  */
+static void kvstoreDictRehashingCompleted(dict *d) {
+    kvstore *kvs = d->type->userdata;
+    kvstoreDictMetadata *metadata = (kvstoreDictMetadata *)dictMetadata(d);
+    if (metadata->rehashing_node) {
+        listDelNode(kvs->rehashing, metadata->rehashing_node);
+        metadata->rehashing_node = NULL;
+    }
+
+    unsigned long long from, to;
+    dictRehashingInfo(d, &from, &to);
+    kvs->bucket_count -= from; /* Finished rehashing (Remove the old ht size) */
+    kvs->overhead_hashtable_lut -= from;
+    kvs->overhead_hashtable_rehashing -= from;
+}
+
+/* Returns the size of the DB dict metadata in bytes. */
+static size_t kvstoreDictMetadataSize(dict *d) {
+    UNUSED(d);
+    return sizeof(kvstoreDictMetadata);
+}
+
+/**********************************/
+/*** API **************************/
+/**********************************/
+
+/* Create an array of dictionaries
+ * num_dicts_bits is the log2 of the amount of dictionaries needed (e.g. 0 for 1 dict,
+ * 3 for 8 dicts, etc.) */
+kvstore *kvstoreCreate(dictType *type, int num_dicts_bits, int flags) {
+    /* We can't support more than 2^16 dicts because we want to save 48 bits
+     * for the dict cursor, see kvstoreScan */
+    assert(num_dicts_bits <= 16);
+
+    kvstore *kvs = zcalloc(sizeof(*kvs));
+    memcpy(&kvs->dtype, type, sizeof(kvs->dtype));
+    kvs->flags = flags;
+
+    /* kvstore must be the one to set these callbacks, so we make sure the
+     * caller didn't do it */
+    assert(!type->userdata);
+    assert(!type->dictMetadataBytes);
+    assert(!type->rehashingStarted);
+    assert(!type->rehashingCompleted);
+    kvs->dtype.userdata = kvs;
+    kvs->dtype.dictMetadataBytes = kvstoreDictMetadataSize;
+    kvs->dtype.rehashingStarted = kvstoreDictRehashingStarted;
+    kvs->dtype.rehashingCompleted = kvstoreDictRehashingCompleted;
+
+    kvs->num_dicts_bits = num_dicts_bits;
+    kvs->num_dicts = 1 << kvs->num_dicts_bits;
+    kvs->dicts = zcalloc(sizeof(dict*) * kvs->num_dicts);
+    if (!(kvs->flags & KVSTORE_ALLOCATE_DICTS_ON_DEMAND)) {
+        for (int i = 0; i < kvs->num_dicts; i++)
+            createDictIfNeeded(kvs, i);
+    }
+
+    kvs->rehashing = listCreate();
+    kvs->key_count = 0;
+    kvs->non_empty_dicts = 0;
+    kvs->resize_cursor = 0;
+    kvs->dict_size_index = kvs->num_dicts > 1? zcalloc(sizeof(unsigned long long) * (kvs->num_dicts + 1)) : NULL;
+    kvs->bucket_count = 0;
+    kvs->overhead_hashtable_lut = 0;
+    kvs->overhead_hashtable_rehashing = 0;
+
+    return kvs;
+}
+
+void kvstoreEmpty(kvstore *kvs, void(callback)(dict*)) {
+    for (int didx = 0; didx < kvs->num_dicts; didx++) {
+        dict *d = kvstoreGetDict(kvs, didx);
+        if (!d)
+            continue;
+        kvstoreDictMetadata *metadata = (kvstoreDictMetadata *)dictMetadata(d);
+        if (metadata->rehashing_node)
+            metadata->rehashing_node = NULL;
+        dictEmpty(d, callback);
+        freeDictIfNeeded(kvs, didx);
+    }
+
+    listEmpty(kvs->rehashing);
+
+    kvs->key_count = 0;
+    kvs->non_empty_dicts = 0;
+    kvs->resize_cursor = 0;
+    kvs->bucket_count = 0;
+    if (kvs->dict_size_index)
+        memset(kvs->dict_size_index, 0, sizeof(unsigned long long) * (kvs->num_dicts + 1));
+    kvs->overhead_hashtable_lut = 0;
+    kvs->overhead_hashtable_rehashing = 0;
+}
+
+void kvstoreRelease(kvstore *kvs) {
+    for (int didx = 0; didx < kvs->num_dicts; didx++) {
+        dict *d = kvstoreGetDict(kvs, didx);
+        if (!d)
+            continue;
+        kvstoreDictMetadata *metadata = (kvstoreDictMetadata *)dictMetadata(d);
+        if (metadata->rehashing_node)
+            metadata->rehashing_node = NULL;
+        dictRelease(d);
+    }
+    zfree(kvs->dicts);
+
+    listRelease(kvs->rehashing);
+    if (kvs->dict_size_index)
+        zfree(kvs->dict_size_index);
+
+    zfree(kvs);
+}
+
+unsigned long long int kvstoreSize(kvstore *kvs) {
+    if (kvs->num_dicts != 1) {
+        return kvs->key_count;
+    } else {
+        return kvs->dicts[0]? dictSize(kvs->dicts[0]) : 0;
+    }
+}
+
+/* This method provides the cumulative sum of all the dictionary buckets
+ * across dictionaries in a database. */
+unsigned long kvstoreBuckets(kvstore *kvs) {
+    if (kvs->num_dicts != 1) {
+        return kvs->bucket_count;
+    } else {
+        return kvs->dicts[0]? dictBuckets(kvs->dicts[0]) : 0;
+    }
+}
+
+size_t kvstoreMemUsage(kvstore *kvs) {
+    size_t mem = sizeof(*kvs);
+
+    unsigned long long keys_count = kvstoreSize(kvs);
+    mem += keys_count * dictEntryMemUsage() +
+           kvstoreBuckets(kvs) * sizeof(dictEntry*) +
+           kvs->allocated_dicts * (sizeof(dict) + kvstoreDictMetadataSize(NULL));
+
+    /* Values are dict* shared with kvs->dicts */
+    mem += listLength(kvs->rehashing) * sizeof(listNode);
+
+    if (kvs->dict_size_index)
+        mem += sizeof(unsigned long long) * (kvs->num_dicts + 1);
+
+    return mem;
+}
+
+/*
+ * This method is used to iterate over the elements of the entire kvstore specifically across dicts.
+ * It's a three pronged approach.
+ *
+ * 1. It uses the provided cursor `cursor` to retrieve the dict index from it.
+ * 2. If the dictionary is in a valid state checked through the provided callback `dictScanValidFunction`,
+ *    it performs a dictScan over the appropriate `keyType` dictionary of `db`.
+ * 3. If the dict is entirely scanned i.e. the cursor has reached 0, the next non empty dict is discovered.
+ *    The dict information is embedded into the cursor and returned.
+ *
+ * To restrict the scan to a single dict, pass a valid dict index as
+ * 'onlydidx', otherwise pass -1.
+ */
+unsigned long long kvstoreScan(kvstore *kvs, unsigned long long cursor,
+                               int onlydidx, dictScanFunction *scan_cb,
+                               kvstoreScanShouldSkipDict *skip_cb,
+                               void *privdata)
+{
+    unsigned long long _cursor = 0;
+    /* During dictionary traversal, 48 upper bits in the cursor are used for positioning in the HT.
+     * Following lower bits are used for the dict index number, ranging from 0 to 2^num_dicts_bits-1.
+     * Dict index is always 0 at the start of iteration and can be incremented only if there are
+     * multiple dicts. */
+    int didx = getAndClearDictIndexFromCursor(kvs, &cursor);
+    if (onlydidx >= 0) {
+        if (didx < onlydidx) {
+            /* Fast-forward to onlydidx. */
+            assert(onlydidx < kvs->num_dicts);
+            didx = onlydidx;
+            cursor = 0;
+        } else if (didx > onlydidx) {
+            /* The cursor is already past onlydidx. */
+            return 0;
+        }
+    }
+
+    dict *d = kvstoreGetDict(kvs, didx);
+
+    int skip = !d || (skip_cb && skip_cb(d));
+    if (!skip) {
+        _cursor = dictScan(d, cursor, scan_cb, privdata);
+        /* In dictScan, scan_cb may delete entries (e.g., in active expire case). */
+        freeDictIfNeeded(kvs, didx);
+    }
+    /* scanning done for the current dictionary or if the scanning wasn't possible, move to the next dict index. */
+    if (_cursor == 0 || skip) {
+        if (onlydidx >= 0)
+            return 0;
+        didx = kvstoreGetNextNonEmptyDictIndex(kvs, didx);
+    }
+    if (didx == -1) {
+        return 0;
+    }
+    addDictIndexToCursor(kvs, didx, &_cursor);
+    return _cursor;
+}
+
+/*
+ * This functions increases size of kvstore to match desired number.
+ * It resizes all individual dictionaries, unless skip_cb indicates otherwise.
+ *
+ * Based on the parameter `try_expand`, appropriate dict expand API is invoked.
+ * if try_expand is set to 1, `dictTryExpand` is used else `dictExpand`.
+ * The return code is either `DICT_OK`/`DICT_ERR` for both the API(s).
+ * `DICT_OK` response is for successful expansion. However, `DICT_ERR` response signifies failure in allocation in
+ * `dictTryExpand` call and in case of `dictExpand` call it signifies no expansion was performed.
+ */
+int kvstoreExpand(kvstore *kvs, uint64_t newsize, int try_expand, kvstoreExpandShouldSkipDictIndex *skip_cb) {
+    for (int i = 0; i < kvs->num_dicts; i++) {
+        dict *d = kvstoreGetDict(kvs, i);
+        if (!d || (skip_cb && skip_cb(i)))
+            continue;
+        int result = try_expand ? dictTryExpand(d, newsize) : dictExpand(d, newsize);
+        if (try_expand && result == DICT_ERR)
+            return 0;
+    }
+
+    return 1;
+}
+
+/* Returns fair random dict index, probability of each dict being returned is proportional to the number of elements that dictionary holds.
+ * This function guarantees that it returns a dict-index of a non-empty dict, unless the entire kvstore is empty.
+ * Time complexity of this function is O(log(kvs->num_dicts)). */
+int kvstoreGetFairRandomDictIndex(kvstore *kvs) {
+    unsigned long target = kvstoreSize(kvs) ? (randomULong() % kvstoreSize(kvs)) + 1 : 0;
+    return kvstoreFindDictIndexByKeyIndex(kvs, target);
+}
+
+void kvstoreGetStats(kvstore *kvs, char *buf, size_t bufsize, int full) {
+    buf[0] = '\0';
+
+    size_t l;
+    char *orig_buf = buf;
+    size_t orig_bufsize = bufsize;
+    dictStats *mainHtStats = NULL;
+    dictStats *rehashHtStats = NULL;
+    dict *d;
+    kvstoreIterator *kvs_it = kvstoreIteratorInit(kvs);
+    while ((d = kvstoreIteratorNextDict(kvs_it))) {
+        dictStats *stats = dictGetStatsHt(d, 0, full);
+        if (!mainHtStats) {
+            mainHtStats = stats;
+        } else {
+            dictCombineStats(stats, mainHtStats);
+            dictFreeStats(stats);
+        }
+        if (dictIsRehashing(d)) {
+            stats = dictGetStatsHt(d, 1, full);
+            if (!rehashHtStats) {
+                rehashHtStats = stats;
+            } else {
+                dictCombineStats(stats, rehashHtStats);
+                dictFreeStats(stats);
+            }
+        }
+    }
+    kvstoreIteratorRelease(kvs_it);
+
+    if (mainHtStats && bufsize > 0) {
+        l = dictGetStatsMsg(buf, bufsize, mainHtStats, full);
+        dictFreeStats(mainHtStats);
+        buf += l;
+        bufsize -= l;
+    }
+
+    if (rehashHtStats && bufsize > 0) {
+        l = dictGetStatsMsg(buf, bufsize, rehashHtStats, full);
+        dictFreeStats(rehashHtStats);
+        buf += l;
+        bufsize -= l;
+    }
+    /* Make sure there is a NULL term at the end. */
+    if (orig_bufsize) orig_buf[orig_bufsize - 1] = '\0';
+}
+
+/* Finds a dict containing target element in a key space ordered by dict index.
+ * Consider this example. Dictionaries are represented by brackets and keys by dots:
+ *  #0   #1   #2     #3    #4
+ * [..][....][...][.......][.]
+ *                    ^
+ *                 target
+ *
+ * In this case dict #3 contains key that we are trying to find.
+ *
+ * The return value is 0 based dict-index, and the range of the target is [1..kvstoreSize], kvstoreSize inclusive.
+ *
+ * To find the dict, we start with the root node of the binary index tree and search through its children
+ * from the highest index (2^num_dicts_bits in our case) to the lowest index. At each node, we check if the target
+ * value is greater than the node's value. If it is, we remove the node's value from the target and recursively
+ * search for the new target using the current node as the parent.
+ * Time complexity of this function is O(log(kvs->num_dicts))
+ */
+int kvstoreFindDictIndexByKeyIndex(kvstore *kvs, unsigned long target) {
+    if (kvs->num_dicts == 1 || kvstoreSize(kvs) == 0)
+        return 0;
+    assert(target <= kvstoreSize(kvs));
+
+    int result = 0, bit_mask = 1 << kvs->num_dicts_bits;
+    for (int i = bit_mask; i != 0; i >>= 1) {
+        int current = result + i;
+        /* When the target index is greater than 'current' node value the we will update
+         * the target and search in the 'current' node tree. */
+        if (target > kvs->dict_size_index[current]) {
+            target -= kvs->dict_size_index[current];
+            result = current;
+        }
+    }
+    /* Adjust the result to get the correct dict:
+     * 1. result += 1;
+     *    After the calculations, the index of target in dict_size_index should be the next one,
+     *    so we should add 1.
+     * 2. result -= 1;
+     *    Unlike BIT(dict_size_index is 1-based), dict indices are 0-based, so we need to subtract 1.
+     * As the addition and subtraction cancel each other out, we can simply return the result. */
+    return result;
+}
+
+/* Wrapper for kvstoreFindDictIndexByKeyIndex to get the first non-empty dict index in the kvstore. */
+int kvstoreGetFirstNonEmptyDictIndex(kvstore *kvs) {
+    return kvstoreFindDictIndexByKeyIndex(kvs, 1);
+}
+
+/* Returns next non-empty dict index strictly after given one, or -1 if provided didx is the last one. */
+int kvstoreGetNextNonEmptyDictIndex(kvstore *kvs, int didx) {
+    if (kvs->num_dicts == 1) {
+        assert(didx == 0);
+        return -1;
+    }
+    unsigned long long next_key = cumulativeKeyCountRead(kvs, didx) + 1;
+    return next_key <= kvstoreSize(kvs) ? kvstoreFindDictIndexByKeyIndex(kvs, next_key) : -1;
+}
+
+int kvstoreNumNonEmptyDicts(kvstore *kvs) {
+    return kvs->non_empty_dicts;
+}
+
+int kvstoreNumAllocatedDicts(kvstore *kvs) {
+    return kvs->allocated_dicts;
+}
+
+int kvstoreNumDicts(kvstore *kvs) {
+    return kvs->num_dicts;
+}
+
+/* Returns kvstore iterator that can be used to iterate through sub-dictionaries.
+ *
+ * The caller should free the resulting kvs_it with kvstoreIteratorRelease. */
+kvstoreIterator *kvstoreIteratorInit(kvstore *kvs) {
+    kvstoreIterator *kvs_it = zmalloc(sizeof(*kvs_it));
+    kvs_it->kvs = kvs;
+    kvs_it->didx = -1;
+    kvs_it->next_didx = kvstoreGetFirstNonEmptyDictIndex(kvs_it->kvs); /* Finds first non-empty dict index. */
+    dictInitSafeIterator(&kvs_it->di, NULL);
+    return kvs_it;
+}
+
+/* Free the kvs_it returned by kvstoreIteratorInit. */
+void kvstoreIteratorRelease(kvstoreIterator *kvs_it) {
+    dictIterator *iter = &kvs_it->di;
+    dictResetIterator(iter);
+    /* In the safe iterator context, we may delete entries. */
+    freeDictIfNeeded(kvs_it->kvs, kvs_it->didx);
+    zfree(kvs_it);
+}
+
+
+/* Returns next dictionary from the iterator, or NULL if iteration is complete.
+ *
+ * - Takes care to reset the iter of the previous dict before moved to the next dict.
+ */
+dict *kvstoreIteratorNextDict(kvstoreIterator *kvs_it) {
+    if (kvs_it->next_didx == -1)
+        return NULL;
+
+    /* The dict may be deleted during the iteration process, so here need to check for NULL. */
+    if (kvs_it->didx != -1 && kvstoreGetDict(kvs_it->kvs, kvs_it->didx)) {
+        /* Before we move to the next dict, reset the iter of the previous dict. */
+        dictIterator *iter = &kvs_it->di;
+        dictResetIterator(iter);
+        /* In the safe iterator context, we may delete entries. */
+        freeDictIfNeeded(kvs_it->kvs, kvs_it->didx);
+    }
+
+    kvs_it->didx = kvs_it->next_didx;
+    kvs_it->next_didx = kvstoreGetNextNonEmptyDictIndex(kvs_it->kvs, kvs_it->didx);
+    return kvs_it->kvs->dicts[kvs_it->didx];
+}
+
+int kvstoreIteratorGetCurrentDictIndex(kvstoreIterator *kvs_it) {
+    assert(kvs_it->didx >= 0 && kvs_it->didx < kvs_it->kvs->num_dicts);
+    return kvs_it->didx;
+}
+
+/* Returns next entry. */
+dictEntry *kvstoreIteratorNext(kvstoreIterator *kvs_it) {
+    dictEntry *de = kvs_it->di.d ? dictNext(&kvs_it->di) : NULL;
+    if (!de) { /* No current dict or reached the end of the dictionary. */
+
+        /* Before we move to the next dict, function kvstoreIteratorNextDict()
+         * reset the iter of the previous dict & freeDictIfNeeded(). */
+        dict *d = kvstoreIteratorNextDict(kvs_it);
+
+        if (!d)
+            return NULL;
+
+        dictInitSafeIterator(&kvs_it->di, d);
+        de = dictNext(&kvs_it->di);
+    }
+    return de;
+}
+
+/* This method traverses through kvstore dictionaries and triggers a resize.
+ * It first tries to shrink if needed, and if it isn't, it tries to expand. */
+void kvstoreTryResizeDicts(kvstore *kvs, int limit) {
+    if (limit > kvs->num_dicts)
+        limit = kvs->num_dicts;
+
+    for (int i = 0; i < limit; i++) {
+        int didx = kvs->resize_cursor;
+        dict *d = kvstoreGetDict(kvs, didx);
+        if (d && dictShrinkIfNeeded(d) == DICT_ERR) {
+            dictExpandIfNeeded(d);
+        }
+        kvs->resize_cursor = (didx + 1) % kvs->num_dicts;
+    }
+}
+
+/* Our hash table implementation performs rehashing incrementally while
+ * we write/read from the hash table. Still if the server is idle, the hash
+ * table will use two tables for a long time. So we try to use threshold_us
+ * of CPU time at every call of this function to perform some rehashing.
+ *
+ * The function returns the amount of microsecs spent if some rehashing was
+ * performed, otherwise 0 is returned. */
+uint64_t kvstoreIncrementallyRehash(kvstore *kvs, uint64_t threshold_us) {
+    if (listLength(kvs->rehashing) == 0)
+        return 0;
+
+    /* Our goal is to rehash as many dictionaries as we can before reaching threshold_us,
+     * after each dictionary completes rehashing, it removes itself from the list. */
+    listNode *node;
+    monotime timer;
+    uint64_t elapsed_us = 0;
+    elapsedStart(&timer);
+    while ((node = listFirst(kvs->rehashing))) {
+        dictRehashMicroseconds(listNodeValue(node), threshold_us - elapsed_us);
+
+        elapsed_us = elapsedUs(timer);
+        if (elapsed_us >= threshold_us) {
+            break;  /* Reached the time limit. */
+        }
+    }
+    return elapsed_us;
+}
+
+size_t kvstoreOverheadHashtableLut(kvstore *kvs) {
+    return kvs->overhead_hashtable_lut * sizeof(dictEntry *);
+}
+
+size_t kvstoreOverheadHashtableRehashing(kvstore *kvs) {
+    return kvs->overhead_hashtable_rehashing * sizeof(dictEntry *);
+}
+
+unsigned long kvstoreDictRehashingCount(kvstore *kvs) {
+    return listLength(kvs->rehashing);
+}
+
+unsigned long kvstoreDictSize(kvstore *kvs, int didx)
+{
+    dict *d = kvstoreGetDict(kvs, didx);
+    if (!d)
+        return 0;
+    return dictSize(d);
+}
+
+kvstoreDictIterator *kvstoreGetDictIterator(kvstore *kvs, int didx)
+{
+    kvstoreDictIterator *kvs_di = zmalloc(sizeof(*kvs_di));
+    kvs_di->kvs = kvs;
+    kvs_di->didx = didx;
+    dictInitIterator(&kvs_di->di, kvstoreGetDict(kvs, didx));
+    return kvs_di;
+}
+
+kvstoreDictIterator *kvstoreGetDictSafeIterator(kvstore *kvs, int didx)
+{
+    kvstoreDictIterator *kvs_di = zmalloc(sizeof(*kvs_di));
+    kvs_di->kvs = kvs;
+    kvs_di->didx = didx;
+    dictInitSafeIterator(&kvs_di->di, kvstoreGetDict(kvs, didx));
+    return kvs_di;
+}
+
+/* Free the kvs_di returned by kvstoreGetDictIterator and kvstoreGetDictSafeIterator. */
+void kvstoreReleaseDictIterator(kvstoreDictIterator *kvs_di)
+{
+    /* The dict may be deleted during the iteration process, so here need to check for NULL. */
+    if (kvstoreGetDict(kvs_di->kvs, kvs_di->didx)) {
+        dictResetIterator(&kvs_di->di);
+        /* In the safe iterator context, we may delete entries. */
+        freeDictIfNeeded(kvs_di->kvs, kvs_di->didx);
+    }
+
+    zfree(kvs_di);
+}
+
+/* Get the next element of the dict through kvstoreDictIterator and dictNext. */
+dictEntry *kvstoreDictIteratorNext(kvstoreDictIterator *kvs_di)
+{
+    /* The dict may be deleted during the iteration process, so here need to check for NULL. */
+    dict *d = kvstoreGetDict(kvs_di->kvs, kvs_di->didx);
+    if (!d) return NULL;
+
+    return dictNext(&kvs_di->di);
+}
+
+dictEntry *kvstoreDictGetRandomKey(kvstore *kvs, int didx)
+{
+    dict *d = kvstoreGetDict(kvs, didx);
+    if (!d)
+        return NULL;
+    return dictGetRandomKey(d);
+}
+
+dictEntry *kvstoreDictGetFairRandomKey(kvstore *kvs, int didx)
+{
+    dict *d = kvstoreGetDict(kvs, didx);
+    if (!d)
+        return NULL;
+    return dictGetFairRandomKey(d);
+}
+
+dictEntry *kvstoreDictFindEntryByPtrAndHash(kvstore *kvs, int didx, const void *oldptr, uint64_t hash)
+{
+    dict *d = kvstoreGetDict(kvs, didx);
+    if (!d)
+        return NULL;
+    return dictFindEntryByPtrAndHash(d, oldptr, hash);
+}
+
+unsigned int kvstoreDictGetSomeKeys(kvstore *kvs, int didx, dictEntry **des, unsigned int count)
+{
+    dict *d = kvstoreGetDict(kvs, didx);
+    if (!d)
+        return 0;
+    return dictGetSomeKeys(d, des, count);
+}
+
+int kvstoreDictExpand(kvstore *kvs, int didx, unsigned long size)
+{
+    dict *d = kvstoreGetDict(kvs, didx);
+    if (!d)
+        return DICT_ERR;
+    return dictExpand(d, size);
+}
+
+unsigned long kvstoreDictScanDefrag(kvstore *kvs, int didx, unsigned long v, dictScanFunction *fn, dictDefragFunctions *defragfns, void *privdata)
+{
+    dict *d = kvstoreGetDict(kvs, didx);
+    if (!d)
+        return 0;
+    return dictScanDefrag(d, v, fn, defragfns, privdata);
+}
+
+/* Unlike kvstoreDictScanDefrag(), this method doesn't defrag the data(keys and values)
+ * within dict, it only reallocates the memory used by the dict structure itself using 
+ * the provided allocation function. This feature was added for the active defrag feature.
+ *
+ * The 'defragfn' callback is called with a reference to the dict
+ * that callback can reallocate. */
+void kvstoreDictLUTDefrag(kvstore *kvs, kvstoreDictLUTDefragFunction *defragfn) {
+    for (int didx = 0; didx < kvs->num_dicts; didx++) {
+        dict **d = kvstoreGetDictRef(kvs, didx), *newd;
+        if (!*d)
+            continue;
+        if ((newd = defragfn(*d))) {
+            *d = newd;
+
+            /* After defragmenting the dict, update its corresponding
+             * rehashing node in the kvstore's rehashing list. */
+            kvstoreDictMetadata *metadata = (kvstoreDictMetadata *)dictMetadata(*d);
+            if (metadata->rehashing_node)
+                metadata->rehashing_node->value = *d;
+        }
+    }
+}
+
+uint64_t kvstoreGetHash(kvstore *kvs, const void *key)
+{
+    return kvs->dtype.hashFunction(key);
+}
+
+void *kvstoreDictFetchValue(kvstore *kvs, int didx, const void *key)
+{
+    dict *d = kvstoreGetDict(kvs, didx);
+    if (!d)
+        return NULL;
+    return dictFetchValue(d, key);
+}
+
+dictEntry *kvstoreDictFind(kvstore *kvs, int didx, void *key) {
+    dict *d = kvstoreGetDict(kvs, didx);
+    if (!d)
+        return NULL;
+    return dictFind(d, key);
+}
+
+dictEntry *kvstoreDictAddRaw(kvstore *kvs, int didx, void *key, dictEntry **existing) {
+    dict *d = createDictIfNeeded(kvs, didx);
+    dictEntry *ret = dictAddRaw(d, key, existing);
+    if (ret)
+        cumulativeKeyCountAdd(kvs, didx, 1);
+    return ret;
+}
+
+void kvstoreDictSetKey(kvstore *kvs, int didx, dictEntry* de, void *key) {
+    dict *d = kvstoreGetDict(kvs, didx);
+    dictSetKey(d, de, key);
+}
+
+void kvstoreDictSetVal(kvstore *kvs, int didx, dictEntry *de, void *val) {
+    dict *d = kvstoreGetDict(kvs, didx);
+    dictSetVal(d, de, val);
+}
+
+dictEntry *kvstoreDictTwoPhaseUnlinkFind(kvstore *kvs, int didx, const void *key, dictEntry ***plink, int *table_index) {
+    dict *d = kvstoreGetDict(kvs, didx);
+    if (!d)
+        return NULL;
+    return dictTwoPhaseUnlinkFind(kvstoreGetDict(kvs, didx), key, plink, table_index);
+}
+
+void kvstoreDictTwoPhaseUnlinkFree(kvstore *kvs, int didx, dictEntry *he, dictEntry **plink, int table_index) {
+    dict *d = kvstoreGetDict(kvs, didx);
+    dictTwoPhaseUnlinkFree(d, he, plink, table_index);
+    cumulativeKeyCountAdd(kvs, didx, -1);
+    freeDictIfNeeded(kvs, didx);
+}
+
+int kvstoreDictDelete(kvstore *kvs, int didx, const void *key) {
+    dict *d = kvstoreGetDict(kvs, didx);
+    if (!d)
+        return DICT_ERR;
+    int ret = dictDelete(d, key);
+    if (ret == DICT_OK) {
+        cumulativeKeyCountAdd(kvs, didx, -1);
+        freeDictIfNeeded(kvs, didx);
+    }
+    return ret;
+}
+
+#ifdef REDIS_TEST
+#include <stdio.h>
+#include "testhelp.h"
+
+#define TEST(name) printf("test — %s\n", name);
+
+uint64_t hashTestCallback(const void *key) {
+    return dictGenHashFunction((unsigned char*)key, strlen((char*)key));
+}
+
+void freeTestCallback(dict *d, void *val) {
+    UNUSED(d);
+    zfree(val);
+}
+
+void *defragAllocTest(void *ptr) {
+    size_t size = zmalloc_usable_size(ptr);
+    void *newptr = zmalloc(size);
+    memcpy(newptr, ptr, size);
+    zfree(ptr);
+    return newptr;
+}
+
+dict *defragLUTTestCallback(dict *d) {
+    /* handle the dict struct */
+    d = defragAllocTest(d);
+    /* handle the first hash table */
+    d->ht_table[0] = defragAllocTest(d->ht_table[0]);
+    /* handle the second hash table */
+    if (d->ht_table[1])
+        d->ht_table[1] = defragAllocTest(d->ht_table[1]);
+    return d; 
+}
+
+dictType KvstoreDictTestType = {
+    hashTestCallback,
+    NULL,
+    NULL,
+    NULL,
+    freeTestCallback,
+    NULL,
+    NULL
+};
+
+char *stringFromInt(int value) {
+    char buf[32];
+    int len;
+    char *s;
+
+    len = snprintf(buf, sizeof(buf), "%d",value);
+    s = zmalloc(len+1);
+    memcpy(s, buf, len);
+    s[len] = '\0';
+    return s;
+}
+
+/* ./redis-server test kvstore */
+int kvstoreTest(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    int i;
+    void *key;
+    dictEntry *de;
+    kvstoreIterator *kvs_it;
+    kvstoreDictIterator *kvs_di;
+
+    int didx = 0;
+    int curr_slot = 0;
+    kvstore *kvs1 = kvstoreCreate(&KvstoreDictTestType, 0, KVSTORE_ALLOCATE_DICTS_ON_DEMAND);
+    kvstore *kvs2 = kvstoreCreate(&KvstoreDictTestType, 0, KVSTORE_ALLOCATE_DICTS_ON_DEMAND | KVSTORE_FREE_EMPTY_DICTS);
+
+    TEST("Add 16 keys") {
+        for (i = 0; i < 16; i++) {
+            de = kvstoreDictAddRaw(kvs1, didx, stringFromInt(i), NULL);
+            assert(de != NULL);
+            de = kvstoreDictAddRaw(kvs2, didx, stringFromInt(i), NULL);
+            assert(de != NULL);
+        }
+        assert(kvstoreDictSize(kvs1, didx) == 16);
+        assert(kvstoreSize(kvs1) == 16);
+        assert(kvstoreDictSize(kvs2, didx) == 16);
+        assert(kvstoreSize(kvs2) == 16);
+    }
+
+    TEST("kvstoreIterator case 1: removing all keys does not delete the empty dict") {
+        kvs_it = kvstoreIteratorInit(kvs1);
+        while((de = kvstoreIteratorNext(kvs_it)) != NULL) {
+            curr_slot = kvstoreIteratorGetCurrentDictIndex(kvs_it);
+            key = dictGetKey(de);
+            assert(kvstoreDictDelete(kvs1, curr_slot, key) == DICT_OK);
+        }
+        kvstoreIteratorRelease(kvs_it);
+
+        dict *d = kvstoreGetDict(kvs1, didx);
+        assert(d != NULL);
+        assert(kvstoreDictSize(kvs1, didx) == 0);
+        assert(kvstoreSize(kvs1) == 0);
+    }
+
+    TEST("kvstoreIterator case 2: removing all keys will delete the empty dict") {
+        kvs_it = kvstoreIteratorInit(kvs2);
+        while((de = kvstoreIteratorNext(kvs_it)) != NULL) {
+            curr_slot = kvstoreIteratorGetCurrentDictIndex(kvs_it);
+            key = dictGetKey(de);
+            assert(kvstoreDictDelete(kvs2, curr_slot, key) == DICT_OK);
+        }
+        kvstoreIteratorRelease(kvs_it);
+
+        /* Make sure the dict was removed from the rehashing list. */
+        while (kvstoreIncrementallyRehash(kvs2, 1000)) {}
+
+        dict *d = kvstoreGetDict(kvs2, didx);
+        assert(d == NULL);
+        assert(kvstoreDictSize(kvs2, didx) == 0);
+        assert(kvstoreSize(kvs2) == 0);
+    }
+
+    TEST("Add 16 keys again") {
+        for (i = 0; i < 16; i++) {
+            de = kvstoreDictAddRaw(kvs1, didx, stringFromInt(i), NULL);
+            assert(de != NULL);
+            de = kvstoreDictAddRaw(kvs2, didx, stringFromInt(i), NULL);
+            assert(de != NULL);
+        }
+        assert(kvstoreDictSize(kvs1, didx) == 16);
+        assert(kvstoreSize(kvs1) == 16);
+        assert(kvstoreDictSize(kvs2, didx) == 16);
+        assert(kvstoreSize(kvs2) == 16);
+    }
+
+    TEST("kvstoreDictIterator case 1: removing all keys does not delete the empty dict") {
+        kvs_di = kvstoreGetDictSafeIterator(kvs1, didx);
+        while((de = kvstoreDictIteratorNext(kvs_di)) != NULL) {
+            key = dictGetKey(de);
+            assert(kvstoreDictDelete(kvs1, didx, key) == DICT_OK);
+        }
+        kvstoreReleaseDictIterator(kvs_di);
+
+        dict *d = kvstoreGetDict(kvs1, didx);
+        assert(d != NULL);
+        assert(kvstoreDictSize(kvs1, didx) == 0);
+        assert(kvstoreSize(kvs1) == 0);
+    }
+
+    TEST("kvstoreDictIterator case 2: removing all keys will delete the empty dict") {
+        kvs_di = kvstoreGetDictSafeIterator(kvs2, didx);
+        while((de = kvstoreDictIteratorNext(kvs_di)) != NULL) {
+            key = dictGetKey(de);
+            assert(kvstoreDictDelete(kvs2, didx, key) == DICT_OK);
+        }
+        kvstoreReleaseDictIterator(kvs_di);
+
+        dict *d = kvstoreGetDict(kvs2, didx);
+        assert(d == NULL);
+        assert(kvstoreDictSize(kvs2, didx) == 0);
+        assert(kvstoreSize(kvs2) == 0);
+    }
+
+    TEST("Verify that a rehashing dict's node in the rehashing list is correctly updated after defragmentation") {
+        kvstore *kvs = kvstoreCreate(&KvstoreDictTestType, 0, KVSTORE_ALLOCATE_DICTS_ON_DEMAND);
+        for (i = 0; i < 256; i++) {
+            de = kvstoreDictAddRaw(kvs, 0, stringFromInt(i), NULL);
+            if (listLength(kvs->rehashing)) break;
+        }
+        assert(listLength(kvs->rehashing));
+        kvstoreDictLUTDefrag(kvs, defragLUTTestCallback);
+        while (kvstoreIncrementallyRehash(kvs, 1000)) {}
+        kvstoreRelease(kvs);
+    }
+
+    kvstoreRelease(kvs1);
+    kvstoreRelease(kvs2);
+    return 0;
+}
+#endif
diff --git a/src/kvstore.h b/src/kvstore.h
new file mode 100644
index 00000000000..bce45fe4c1b
--- /dev/null
+++ b/src/kvstore.h
@@ -0,0 +1,79 @@
+#ifndef DICTARRAY_H_
+#define DICTARRAY_H_
+
+#include "dict.h"
+#include "adlist.h"
+
+typedef struct _kvstore kvstore;
+typedef struct _kvstoreIterator kvstoreIterator;
+typedef struct _kvstoreDictIterator kvstoreDictIterator;
+
+typedef int (kvstoreScanShouldSkipDict)(dict *d);
+typedef int (kvstoreExpandShouldSkipDictIndex)(int didx);
+
+#define KVSTORE_ALLOCATE_DICTS_ON_DEMAND (1<<0)
+#define KVSTORE_FREE_EMPTY_DICTS (1<<1)
+kvstore *kvstoreCreate(dictType *type, int num_dicts_bits, int flags);
+void kvstoreEmpty(kvstore *kvs, void(callback)(dict*));
+void kvstoreRelease(kvstore *kvs);
+unsigned long long kvstoreSize(kvstore *kvs);
+unsigned long kvstoreBuckets(kvstore *kvs);
+size_t kvstoreMemUsage(kvstore *kvs);
+unsigned long long kvstoreScan(kvstore *kvs, unsigned long long cursor,
+                               int onlydidx, dictScanFunction *scan_cb,
+                               kvstoreScanShouldSkipDict *skip_cb,
+                               void *privdata);
+int kvstoreExpand(kvstore *kvs, uint64_t newsize, int try_expand, kvstoreExpandShouldSkipDictIndex *skip_cb);
+int kvstoreGetFairRandomDictIndex(kvstore *kvs);
+void kvstoreGetStats(kvstore *kvs, char *buf, size_t bufsize, int full);
+
+int kvstoreFindDictIndexByKeyIndex(kvstore *kvs, unsigned long target);
+int kvstoreGetFirstNonEmptyDictIndex(kvstore *kvs);
+int kvstoreGetNextNonEmptyDictIndex(kvstore *kvs, int didx);
+int kvstoreNumNonEmptyDicts(kvstore *kvs);
+int kvstoreNumAllocatedDicts(kvstore *kvs);
+int kvstoreNumDicts(kvstore *kvs);
+uint64_t kvstoreGetHash(kvstore *kvs, const void *key);
+
+/* kvstore iterator specific functions */
+kvstoreIterator *kvstoreIteratorInit(kvstore *kvs);
+void kvstoreIteratorRelease(kvstoreIterator *kvs_it);
+dict *kvstoreIteratorNextDict(kvstoreIterator *kvs_it);
+int kvstoreIteratorGetCurrentDictIndex(kvstoreIterator *kvs_it);
+dictEntry *kvstoreIteratorNext(kvstoreIterator *kvs_it);
+
+/* Rehashing */
+void kvstoreTryResizeDicts(kvstore *kvs, int limit);
+uint64_t kvstoreIncrementallyRehash(kvstore *kvs, uint64_t threshold_us);
+size_t kvstoreOverheadHashtableLut(kvstore *kvs);
+size_t kvstoreOverheadHashtableRehashing(kvstore *kvs);
+unsigned long kvstoreDictRehashingCount(kvstore *kvs);
+
+/* Specific dict access by dict-index */
+unsigned long kvstoreDictSize(kvstore *kvs, int didx);
+kvstoreDictIterator *kvstoreGetDictIterator(kvstore *kvs, int didx);
+kvstoreDictIterator *kvstoreGetDictSafeIterator(kvstore *kvs, int didx);
+void kvstoreReleaseDictIterator(kvstoreDictIterator *kvs_id);
+dictEntry *kvstoreDictIteratorNext(kvstoreDictIterator *kvs_di);
+dictEntry *kvstoreDictGetRandomKey(kvstore *kvs, int didx);
+dictEntry *kvstoreDictGetFairRandomKey(kvstore *kvs, int didx);
+dictEntry *kvstoreDictFindEntryByPtrAndHash(kvstore *kvs, int didx, const void *oldptr, uint64_t hash);
+unsigned int kvstoreDictGetSomeKeys(kvstore *kvs, int didx, dictEntry **des, unsigned int count);
+int kvstoreDictExpand(kvstore *kvs, int didx, unsigned long size);
+unsigned long kvstoreDictScanDefrag(kvstore *kvs, int didx, unsigned long v, dictScanFunction *fn, dictDefragFunctions *defragfns, void *privdata);
+typedef dict *(kvstoreDictLUTDefragFunction)(dict *d);
+void kvstoreDictLUTDefrag(kvstore *kvs, kvstoreDictLUTDefragFunction *defragfn);
+void *kvstoreDictFetchValue(kvstore *kvs, int didx, const void *key);
+dictEntry *kvstoreDictFind(kvstore *kvs, int didx, void *key);
+dictEntry *kvstoreDictAddRaw(kvstore *kvs, int didx, void *key, dictEntry **existing);
+void kvstoreDictSetKey(kvstore *kvs, int didx, dictEntry* de, void *key);
+void kvstoreDictSetVal(kvstore *kvs, int didx, dictEntry *de, void *val);
+dictEntry *kvstoreDictTwoPhaseUnlinkFind(kvstore *kvs, int didx, const void *key, dictEntry ***plink, int *table_index);
+void kvstoreDictTwoPhaseUnlinkFree(kvstore *kvs, int didx, dictEntry *he, dictEntry **plink, int table_index);
+int kvstoreDictDelete(kvstore *kvs, int didx, const void *key);
+
+#ifdef REDIS_TEST
+int kvstoreTest(int argc, char *argv[], int flags);
+#endif
+
+#endif /* DICTARRAY_H_ */
diff --git a/src/latency.c b/src/latency.c
index d46890e826f..db4c9044dd2 100644
--- a/src/latency.c
+++ b/src/latency.c
@@ -5,32 +5,11 @@
  *
  * ----------------------------------------------------------------------------
  *
- * Copyright (c) 2014, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2014-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -279,7 +258,7 @@ sds createLatencyReport(void) {
 
         /* Potentially commands. */
         if (!strcasecmp(event,"command")) {
-            if (server.slowlog_log_slower_than < 0) {
+            if (server.slowlog_log_slower_than < 0 || server.slowlog_max_len == 0) {
                 advise_slowlog_enabled = 1;
                 advices++;
             } else if (server.slowlog_log_slower_than/1000 >
diff --git a/src/latency.h b/src/latency.h
index 13503d5c031..1951957c0af 100644
--- a/src/latency.h
+++ b/src/latency.h
@@ -3,32 +3,11 @@
  *
  * ----------------------------------------------------------------------------
  *
- * Copyright (c) 2014, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2014-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __LATENCY_H
diff --git a/src/lazyfree.c b/src/lazyfree.c
index 8ac55f77750..2b98f9a06fc 100644
--- a/src/lazyfree.c
+++ b/src/lazyfree.c
@@ -2,6 +2,8 @@
 #include "bio.h"
 #include "atomicvar.h"
 #include "functions.h"
+#include "cluster.h"
+#include "ebuckets.h"
 
 static redisAtomic size_t lazyfree_objects = 0;
 static redisAtomic size_t lazyfreed_objects = 0;
@@ -19,14 +21,23 @@ void lazyfreeFreeObject(void *args[]) {
  * database which was substituted with a fresh one in the main thread
  * when the database was logically deleted. */
 void lazyfreeFreeDatabase(void *args[]) {
-    dict *ht1 = (dict *) args[0];
-    dict *ht2 = (dict *) args[1];
-
-    size_t numkeys = dictSize(ht1);
-    dictRelease(ht1);
-    dictRelease(ht2);
+    kvstore *da1 = args[0];
+    kvstore *da2 = args[1];
+    ebuckets oldHfe = args[2];
+    ebDestroy(&oldHfe, &hashExpireBucketsType, NULL);
+    size_t numkeys = kvstoreSize(da1);
+    kvstoreRelease(da1);
+    kvstoreRelease(da2);
     atomicDecr(lazyfree_objects,numkeys);
     atomicIncr(lazyfreed_objects,numkeys);
+
+#if defined(USE_JEMALLOC)
+    /* Only clear the current thread cache.
+     * Ignore the return call since this will fail if the tcache is disabled. */
+    je_mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
+
+    jemalloc_purge();
+#endif
 }
 
 /* Release the key tracking table. */
@@ -38,11 +49,22 @@ void lazyFreeTrackingTable(void *args[]) {
     atomicIncr(lazyfreed_objects,len);
 }
 
+/* Release the error stats rax tree. */
+void lazyFreeErrors(void *args[]) {
+    rax *errors = args[0];
+    size_t len = errors->numele;
+    raxFreeWithCallback(errors, zfree);
+    atomicDecr(lazyfree_objects,len);
+    atomicIncr(lazyfreed_objects,len);
+}
+
 /* Release the lua_scripts dict. */
 void lazyFreeLuaScripts(void *args[]) {
     dict *lua_scripts = args[0];
+    list *lua_scripts_lru_list = args[1];
+    lua_State *lua = args[2];
     long long len = dictSize(lua_scripts);
-    dictRelease(lua_scripts);
+    freeLuaScriptsSync(lua_scripts, lua_scripts_lru_list, lua);
     atomicDecr(lazyfree_objects,len);
     atomicIncr(lazyfreed_objects,len);
 }
@@ -50,7 +72,7 @@ void lazyFreeLuaScripts(void *args[]) {
 /* Release the functions ctx. */
 void lazyFreeFunctionsCtx(void *args[]) {
     functionsLibCtx *functions_lib_ctx = args[0];
-    size_t len = functionsLibCtxfunctionsLen(functions_lib_ctx);
+    size_t len = functionsLibCtxFunctionsLen(functions_lib_ctx);
     functionsLibCtxFree(functions_lib_ctx);
     atomicDecr(lazyfree_objects,len);
     atomicIncr(lazyfreed_objects,len);
@@ -174,11 +196,19 @@ void freeObjAsync(robj *key, robj *obj, int dbid) {
  * create a new empty set of hash tables and scheduling the old ones for
  * lazy freeing. */
 void emptyDbAsync(redisDb *db) {
-    dict *oldht1 = db->dict, *oldht2 = db->expires;
-    db->dict = dictCreate(&dbDictType);
-    db->expires = dictCreate(&dbExpiresDictType);
-    atomicIncr(lazyfree_objects,dictSize(oldht1));
-    bioCreateLazyFreeJob(lazyfreeFreeDatabase,2,oldht1,oldht2);
+    int slot_count_bits = 0;
+    int flags = KVSTORE_ALLOCATE_DICTS_ON_DEMAND;
+    if (server.cluster_enabled) {
+        slot_count_bits = CLUSTER_SLOT_MASK_BITS;
+        flags |= KVSTORE_FREE_EMPTY_DICTS;
+    }
+    kvstore *oldkeys = db->keys, *oldexpires = db->expires;
+    ebuckets oldHfe = db->hexpires;
+    db->keys = kvstoreCreate(&dbDictType, slot_count_bits, flags);
+    db->expires = kvstoreCreate(&dbExpiresDictType, slot_count_bits, flags);
+    db->hexpires = ebCreate();
+    atomicIncr(lazyfree_objects, kvstoreSize(oldkeys));
+    bioCreateLazyFreeJob(lazyfreeFreeDatabase, 3, oldkeys, oldexpires, oldHfe);
 }
 
 /* Free the key tracking table.
@@ -193,20 +223,33 @@ void freeTrackingRadixTreeAsync(rax *tracking) {
     }
 }
 
-/* Free lua_scripts dict, if the dict is huge enough, free it in async way. */
-void freeLuaScriptsAsync(dict *lua_scripts) {
+/* Free the error stats rax tree.
+ * If the rax tree is huge enough, free it in async way. */
+void freeErrorsRadixTreeAsync(rax *errors) {
+    /* Because this rax has only keys and no values so we use numnodes. */
+    if (errors->numnodes > LAZYFREE_THRESHOLD) {
+        atomicIncr(lazyfree_objects,errors->numele);
+        bioCreateLazyFreeJob(lazyFreeErrors,1,errors);
+    } else {
+        raxFreeWithCallback(errors, zfree);
+    }
+}
+
+/* Free lua_scripts dict and lru list, if the dict is huge enough, free them in async way.
+ * Close lua interpreter, if there are a lot of lua scripts, close it in async way. */
+void freeLuaScriptsAsync(dict *lua_scripts, list *lua_scripts_lru_list, lua_State *lua) {
     if (dictSize(lua_scripts) > LAZYFREE_THRESHOLD) {
         atomicIncr(lazyfree_objects,dictSize(lua_scripts));
-        bioCreateLazyFreeJob(lazyFreeLuaScripts,1,lua_scripts);
+        bioCreateLazyFreeJob(lazyFreeLuaScripts,3,lua_scripts,lua_scripts_lru_list,lua);
     } else {
-        dictRelease(lua_scripts);
+        freeLuaScriptsSync(lua_scripts, lua_scripts_lru_list, lua);
     }
 }
 
 /* Free functions ctx, if the functions ctx contains enough functions, free it in async way. */
 void freeFunctionsAsync(functionsLibCtx *functions_lib_ctx) {
-    if (functionsLibCtxfunctionsLen(functions_lib_ctx) > LAZYFREE_THRESHOLD) {
-        atomicIncr(lazyfree_objects,functionsLibCtxfunctionsLen(functions_lib_ctx));
+    if (functionsLibCtxFunctionsLen(functions_lib_ctx) > LAZYFREE_THRESHOLD) {
+        atomicIncr(lazyfree_objects,functionsLibCtxFunctionsLen(functions_lib_ctx));
         bioCreateLazyFreeJob(lazyFreeFunctionsCtx,1,functions_lib_ctx);
     } else {
         functionsLibCtxFree(functions_lib_ctx);
diff --git a/src/listpack.c b/src/listpack.c
index ecc7e9f6fb8..5d9028e13d0 100644
--- a/src/listpack.c
+++ b/src/listpack.c
@@ -4,33 +4,11 @@
  *
  *  https://github.com/antirez/listpack
  *
- * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
- * Copyright (c) 2020, Redis Labs, Inc
+ * Copyright (c) 2017-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include <stdint.h>
@@ -169,7 +147,7 @@ int lpSafeToAdd(unsigned char* lp, size_t add) {
  * "utils.c", function string2ll(), and is copyright:
  *
  * Copyright(C) 2011, Pieter Noordhuis
- * Copyright(C) 2011, Salvatore Sanfilippo
+ * Copyright(C) 2011-current, Redis Ltd.
  *
  * The function is released under the BSD 3-clause license.
  */
@@ -267,51 +245,61 @@ unsigned char* lpShrinkToFit(unsigned char *lp) {
 static inline void lpEncodeIntegerGetType(int64_t v, unsigned char *intenc, uint64_t *enclen) {
     if (v >= 0 && v <= 127) {
         /* Single byte 0-127 integer. */
-        intenc[0] = v;
-        *enclen = 1;
+        if (intenc != NULL) intenc[0] = v;
+        if (enclen != NULL) *enclen = 1;
     } else if (v >= -4096 && v <= 4095) {
         /* 13 bit integer. */
         if (v < 0) v = ((int64_t)1<<13)+v;
-        intenc[0] = (v>>8)|LP_ENCODING_13BIT_INT;
-        intenc[1] = v&0xff;
-        *enclen = 2;
+        if (intenc != NULL) {
+            intenc[0] = (v>>8)|LP_ENCODING_13BIT_INT;
+            intenc[1] = v&0xff;
+        }
+        if (enclen != NULL) *enclen = 2;
     } else if (v >= -32768 && v <= 32767) {
         /* 16 bit integer. */
         if (v < 0) v = ((int64_t)1<<16)+v;
-        intenc[0] = LP_ENCODING_16BIT_INT;
-        intenc[1] = v&0xff;
-        intenc[2] = v>>8;
-        *enclen = 3;
+        if (intenc != NULL) {
+            intenc[0] = LP_ENCODING_16BIT_INT;
+            intenc[1] = v&0xff;
+            intenc[2] = v>>8;
+        }
+        if (enclen != NULL) *enclen = 3;
     } else if (v >= -8388608 && v <= 8388607) {
         /* 24 bit integer. */
         if (v < 0) v = ((int64_t)1<<24)+v;
-        intenc[0] = LP_ENCODING_24BIT_INT;
-        intenc[1] = v&0xff;
-        intenc[2] = (v>>8)&0xff;
-        intenc[3] = v>>16;
-        *enclen = 4;
+        if (intenc != NULL) {
+            intenc[0] = LP_ENCODING_24BIT_INT;
+            intenc[1] = v&0xff;
+            intenc[2] = (v>>8)&0xff;
+            intenc[3] = v>>16;
+        }
+        if (enclen != NULL) *enclen = 4;
     } else if (v >= -2147483648 && v <= 2147483647) {
         /* 32 bit integer. */
         if (v < 0) v = ((int64_t)1<<32)+v;
-        intenc[0] = LP_ENCODING_32BIT_INT;
-        intenc[1] = v&0xff;
-        intenc[2] = (v>>8)&0xff;
-        intenc[3] = (v>>16)&0xff;
-        intenc[4] = v>>24;
-        *enclen = 5;
+        if (intenc != NULL) {
+            intenc[0] = LP_ENCODING_32BIT_INT;
+            intenc[1] = v&0xff;
+            intenc[2] = (v>>8)&0xff;
+            intenc[3] = (v>>16)&0xff;
+            intenc[4] = v>>24;
+        }
+        if (enclen != NULL) *enclen = 5;
     } else {
         /* 64 bit integer. */
         uint64_t uv = v;
-        intenc[0] = LP_ENCODING_64BIT_INT;
-        intenc[1] = uv&0xff;
-        intenc[2] = (uv>>8)&0xff;
-        intenc[3] = (uv>>16)&0xff;
-        intenc[4] = (uv>>24)&0xff;
-        intenc[5] = (uv>>32)&0xff;
-        intenc[6] = (uv>>40)&0xff;
-        intenc[7] = (uv>>48)&0xff;
-        intenc[8] = uv>>56;
-        *enclen = 9;
+        if (intenc != NULL) {
+            intenc[0] = LP_ENCODING_64BIT_INT;
+            intenc[1] = uv&0xff;
+            intenc[2] = (uv>>8)&0xff;
+            intenc[3] = (uv>>16)&0xff;
+            intenc[4] = (uv>>24)&0xff;
+            intenc[5] = (uv>>32)&0xff;
+            intenc[6] = (uv>>40)&0xff;
+            intenc[7] = (uv>>48)&0xff;
+            intenc[8] = uv>>56;
+        }
+        if (enclen != NULL) *enclen = 9;
     }
 }
 
@@ -681,50 +669,47 @@ unsigned char *lpGetValue(unsigned char *p, unsigned int *slen, long long *lval)
     return vstr;
 }
 
-/* Find pointer to the entry equal to the specified entry. Skip 'skip' entries
- * between every comparison. Returns NULL when the field could not be found. */
-unsigned char *lpFind(unsigned char *lp, unsigned char *p, unsigned char *s, 
-                      uint32_t slen, unsigned int skip) {
+/* This is just a wrapper to lpGet() that is able to get an integer from an entry directly.
+ * Returns 1 and stores the integer in 'lval' if the entry is an integer.
+ * Returns 0 if the entry is a string. */
+int lpGetIntegerValue(unsigned char *p, long long *lval) {
+    int64_t ele_len;
+    if (!lpGet(p, &ele_len, NULL)) {
+        *lval = ele_len;
+        return 1;
+    }
+    return 0;
+}
+
+/* Find pointer to the entry with a comparator callback.
+ *
+ * 'cmp' is a comparator callback. If it returns zero, current entry pointer
+ * will be returned. 'user' is passed to this callback.
+ * Skip 'skip' entries between every comparison.
+ * Returns NULL when the field could not be found. */
+unsigned char *lpFindCb(unsigned char *lp, unsigned char *p,
+                        void *user, lpCmp cmp, unsigned int skip)
+{
     int skipcnt = 0;
-    unsigned char vencoding = 0;
     unsigned char *value;
-    int64_t ll, vll;
+    int64_t ll;
     uint64_t entry_size = 123456789; /* initialized to avoid warning. */
     uint32_t lp_bytes = lpBytes(lp);
 
-    assert(p);
+    if (!p)
+        p = lpFirst(lp);
+
     while (p) {
         if (skipcnt == 0) {
             value = lpGetWithSize(p, &ll, NULL, &entry_size);
             if (value) {
                 /* check the value doesn't reach outside the listpack before accessing it */
                 assert(p >= lp + LP_HDR_SIZE && p + entry_size < lp + lp_bytes);
-                if (slen == ll && memcmp(value, s, slen) == 0) {
-                    return p;
-                }
-            } else {
-                /* Find out if the searched field can be encoded. Note that
-                 * we do it only the first time, once done vencoding is set
-                 * to non-zero and vll is set to the integer value. */
-                if (vencoding == 0) {
-                    /* If the entry can be encoded as integer we set it to
-                     * 1, else set it to UCHAR_MAX, so that we don't retry
-                     * again the next time. */
-                    if (slen >= 32 || slen == 0 || !lpStringToInt64((const char*)s, slen, &vll)) {
-                        vencoding = UCHAR_MAX;
-                    } else {
-                        vencoding = 1;
-                    }
-                }
-
-                /* Compare current entry with specified entry, do it only
-                 * if vencoding != UCHAR_MAX because if there is no encoding
-                 * possible for the field it can't be a valid integer. */
-                if (vencoding != UCHAR_MAX && ll == vll) {
-                    return p;
-                }
             }
 
+            if (cmp(lp, p, user, value, ll) == 0)
+                return p;
+
             /* Reset skip count */
             skipcnt = skip;
             p += entry_size;
@@ -749,6 +734,62 @@ unsigned char *lpFind(unsigned char *lp, unsigned char *p, unsigned char *s,
     return NULL;
 }
 
+struct lpFindArg {
+    unsigned char *s; /* Item to search */
+    uint32_t slen;    /* Item len */
+    int vencoding;
+    int64_t vll;
+};
+
+/* Comparator function to find item */
+static inline int lpFindCmp(const unsigned char *lp, unsigned char *p,
+                            void *user, unsigned char *s, long long slen) {
+    (void) lp;
+    (void) p;
+    struct lpFindArg *arg = user;
+
+    if (s) {
+        if (slen == arg->slen && memcmp(arg->s, s, slen) == 0) {
+            return 0;
+        }
+    } else {
+        /* Find out if the searched field can be encoded. Note that
+         * we do it only the first time, once done vencoding is set
+         * to non-zero and vll is set to the integer value. */
+        if (arg->vencoding == 0) {
+            /* If the entry can be encoded as integer we set it to
+             * 1, else set it to UCHAR_MAX, so that we don't retry
+             * again the next time. */
+            if (arg->slen >= 32 || arg->slen == 0 || !lpStringToInt64((const char*)arg->s, arg->slen, &arg->vll)) {
+                arg->vencoding = UCHAR_MAX;
+            } else {
+                arg->vencoding = 1;
+            }
+        }
+
+        /* Compare current entry with specified entry, do it only
+         * if vencoding != UCHAR_MAX because if there is no encoding
+         * possible for the field it can't be a valid integer. */
+        if (arg->vencoding != UCHAR_MAX && slen == arg->vll) {
+            return 0;
+        }
+    }
+
+    return 1;
+}
+
+/* Find pointer to the entry equal to the specified entry. Skip 'skip' entries
+ * between every comparison. Returns NULL when the field could not be found. */
+unsigned char *lpFind(unsigned char *lp, unsigned char *p, unsigned char *s,
+                      uint32_t slen, unsigned int skip)
+{
+    struct lpFindArg arg = {
+        .s = s,
+        .slen = slen
+    };
+    return lpFindCb(lp, p, &arg, lpFindCmp, skip);
+}
+
 /* Insert, delete or replace the specified string element 'elestr' of length
  * 'size' or integer element 'eleint' at the specified position 'p', with 'p'
  * being a listpack element pointer obtained with lpFirst(), lpLast(), lpNext(),
@@ -926,6 +967,140 @@ unsigned char *lpInsert(unsigned char *lp, unsigned char *elestr, unsigned char
     return lp;
 }
 
+/* Insert the specified elements with 'entries' and 'len' at the specified
+ * position 'p', with 'p' being a listpack element pointer obtained with
+ * lpFirst(), lpLast(), lpNext(), lpPrev() or lpSeek().
+ *
+ * This is similar to lpInsert() but allows you to insert batch of entries in
+ * one call. This function is more efficient than inserting entries one by one
+ * as it does single realloc()/memmove() calls for all the entries.
+ *
+ * In each listpackEntry, if 'sval' is  not null, it is assumed entry is string
+ * and 'sval' and 'slen' will be used. Otherwise, 'lval' will be used to append
+ * the integer entry.
+ *
+ * The elements are inserted before or after the element pointed by 'p'
+ * depending on the 'where' argument, that can be LP_BEFORE or LP_AFTER.
+ *
+ * If 'newp' is not NULL, at the end of a successful call '*newp' will be set
+ * to the address of the element just added, so that it will be possible to
+ * continue an interaction with lpNext() and lpPrev().
+ *
+ * Returns NULL on out of memory or when the listpack total length would exceed
+ * the max allowed size of 2^32-1, otherwise the new pointer to the listpack
+ * holding the new element is returned (and the old pointer passed is no longer
+ * considered valid). */
+unsigned char *lpBatchInsert(unsigned char *lp, unsigned char *p, int where,
+                             listpackEntry *entries, unsigned int len,
+                             unsigned char **newp)
+{
+    assert(where == LP_BEFORE || where == LP_AFTER);
+    assert(entries != NULL && len > 0);
+
+    struct listpackInsertEntry {
+        int enctype;
+        uint64_t enclen;
+        unsigned char intenc[LP_MAX_INT_ENCODING_LEN];
+        unsigned char backlen[LP_MAX_BACKLEN_SIZE];
+        unsigned long backlen_size;
+    };
+
+    uint64_t addedlen = 0;       /* The encoded length of the added elements. */
+    struct listpackInsertEntry tmp[3];  /* Encoded entries */
+    struct listpackInsertEntry *enc = tmp;
+
+    if (len > sizeof(tmp) / sizeof(struct listpackInsertEntry)) {
+        /* If 'len' is larger than local buffer size, allocate on heap. */
+        enc = zmalloc(len * sizeof(struct listpackInsertEntry));
+    }
+
+    /* If we need to insert after the current element, we just jump to the
+     * next element (that could be the EOF one) and handle the case of
+     * inserting before. So the function will actually deal with just one
+     * case: LP_BEFORE. */
+    if (where == LP_AFTER) {
+        p = lpSkip(p);
+        where = LP_BEFORE;
+        ASSERT_INTEGRITY(lp, p);
+    }
+
+    for (unsigned int i = 0; i < len; i++) {
+        listpackEntry *e = &entries[i];
+        if (e->sval) {
+           /* Calling lpEncodeGetType() results into the encoded version of the
+            * element to be stored into 'intenc' in case it is representable as
+            * an integer: in that case, the function returns LP_ENCODING_INT.
+            * Otherwise, if LP_ENCODING_STR is returned, we'll have to call
+            * lpEncodeString() to actually write the encoded string on place
+            * later.
+            *
+            * Whatever the returned encoding is, 'enclen' is populated with the
+            * length of the encoded element. */
+            enc[i].enctype = lpEncodeGetType(e->sval, e->slen,
+                                             enc[i].intenc, &enc[i].enclen);
+        } else {
+            enc[i].enctype = LP_ENCODING_INT;
+            lpEncodeIntegerGetType(e->lval, enc[i].intenc, &enc[i].enclen);
+        }
+        addedlen += enc[i].enclen;
+
+        /* We need to also encode the backward-parsable length of the element
+         * and append it to the end: this allows to traverse the listpack from
+         * the end to the start. */
+        enc[i].backlen_size = lpEncodeBacklen(enc[i].backlen, enc[i].enclen);
+        addedlen += enc[i].backlen_size;
+    }
+
+    uint64_t old_listpack_bytes = lpGetTotalBytes(lp);
+    uint64_t new_listpack_bytes = old_listpack_bytes + addedlen;
+    if (new_listpack_bytes > UINT32_MAX) return NULL;
+
+    /* Store the offset of the element 'p', so that we can obtain its
+     * address again after a reallocation. */
+    unsigned long poff = p-lp;
+    unsigned char *dst = lp + poff; /* May be updated after reallocation. */
+
+    /* Realloc before: we need more room. */
+    if (new_listpack_bytes > old_listpack_bytes &&
+        new_listpack_bytes > lp_malloc_size(lp)) {
+        if ((lp = lp_realloc(lp,new_listpack_bytes)) == NULL) return NULL;
+        dst = lp + poff;
+    }
+
+    /* Setup the listpack relocating the elements to make the exact room
+     * we need to store the new ones. */
+    memmove(dst+addedlen,dst,old_listpack_bytes-poff);
+
+    for (unsigned int i = 0; i < len; i++) {
+        listpackEntry *ent = &entries[i];
+
+        if (newp)
+            *newp = dst;
+
+        if (enc[i].enctype == LP_ENCODING_INT)
+            memcpy(dst, enc[i].intenc, enc[i].enclen);
+        else
+            lpEncodeString(dst, ent->sval, ent->slen);
+
+        dst += enc[i].enclen;
+        memcpy(dst, enc[i].backlen, enc[i].backlen_size);
+        dst += enc[i].backlen_size;
+    }
+
+    /* Update header. */
+    uint32_t num_elements = lpGetNumElements(lp);
+    if (num_elements != LP_HDR_NUMELE_UNKNOWN) {
+        if ((int64_t) len > (int64_t) LP_HDR_NUMELE_UNKNOWN - (int64_t) num_elements)
+            lpSetNumElements(lp, LP_HDR_NUMELE_UNKNOWN);
+        else
+            lpSetNumElements(lp,num_elements + len);
+    }
+    lpSetTotalBytes(lp,new_listpack_bytes);
+    if (enc != tmp) lp_free(enc);
+
+    return lp;
+}
+
 /* This is just a wrapper for lpInsert() to directly use a string. */
 unsigned char *lpInsertString(unsigned char *lp, unsigned char *s, uint32_t slen,
                               unsigned char *p, int where, unsigned char **newp)
@@ -973,6 +1148,20 @@ unsigned char *lpAppendInteger(unsigned char *lp, long long lval) {
     return lpInsertInteger(lp, lval, eofptr, LP_BEFORE, NULL);
 }
 
+/* Append batch of entries to the listpack.
+ *
+ * This call is more efficient than multiple lpAppend() calls as it only does
+ * a single realloc() for all the given entries.
+ *
+ * In each listpackEntry, if 'sval' is  not null, it is assumed entry is string
+ * and 'sval' and 'slen' will be used. Otherwise, 'lval' will be used to append
+ * the integer entry. */
+unsigned char *lpBatchAppend(unsigned char *lp, listpackEntry *entries, unsigned long len) {
+    uint64_t listpack_bytes = lpGetTotalBytes(lp);
+    unsigned char *eofptr = lp + listpack_bytes - 1;
+    return lpBatchInsert(lp, eofptr, LP_BEFORE, entries, len, NULL);
+}
+
 /* This is just a wrapper for lpInsert() to directly use a string to replace
  * the current element. The function returns the new listpack as return
  * value, and also updates the current cursor by updating '*p'. */
@@ -1221,13 +1410,17 @@ size_t lpBytes(unsigned char *lp) {
     return lpGetTotalBytes(lp);
 }
 
-/* Returns the size of a listpack consisting of an integer repeated 'rep' times. */
-size_t lpEstimateBytesRepeatedInteger(long long lval, unsigned long rep) {
+/* Returns the size 'lval' will require when encoded, in bytes */
+size_t lpEntrySizeInteger(long long lval) {
     uint64_t enclen;
-    unsigned char intenc[LP_MAX_INT_ENCODING_LEN];
-    lpEncodeIntegerGetType(lval, intenc, &enclen);
+    lpEncodeIntegerGetType(lval, NULL, &enclen);
     unsigned long backlen = lpEncodeBacklen(NULL, enclen);
-    return LP_HDR_SIZE + (enclen + backlen) * rep + 1;
+    return enclen + backlen;
+}
+
+/* Returns the size of a listpack consisting of an integer repeated 'rep' times. */
+size_t lpEstimateBytesRepeatedInteger(long long lval, unsigned long rep) {
+    return LP_HDR_SIZE + lpEntrySizeInteger(lval) * rep + 1;
 }
 
 /* Seek the specified element and returns the pointer to the seeked element.
@@ -1430,15 +1623,20 @@ static inline void lpSaveValue(unsigned char *val, unsigned int len, int64_t lva
 /* Randomly select a pair of key and value.
  * total_count is a pre-computed length/2 of the listpack (to avoid calls to lpLength)
  * 'key' and 'val' are used to store the result key value pair.
- * 'val' can be NULL if the value is not needed. */
-void lpRandomPair(unsigned char *lp, unsigned long total_count, listpackEntry *key, listpackEntry *val) {
+ * 'val' can be NULL if the value is not needed.
+ * 'tuple_len' indicates entry count of a single logical item. It should be 2
+ * if listpack was saved as key-value pair or more for key-value-...(n_entries). */
+void lpRandomPair(unsigned char *lp, unsigned long total_count,
+                  listpackEntry *key, listpackEntry *val, int tuple_len)
+{
     unsigned char *p;
 
+    assert(tuple_len >= 2);
+
     /* Avoid div by zero on corrupt listpack */
     assert(total_count);
 
-    /* Generate even numbers, because listpack saved K-V pair */
-    int r = (rand() % total_count) * 2;
+    int r = (rand() % total_count) * tuple_len;
     assert((p = lpSeek(lp, r)));
     key->sval = lpGetValue(p, &(key->slen), &(key->lval));
 
@@ -1488,26 +1686,31 @@ void lpRandomEntries(unsigned char *lp, unsigned int count, listpackEntry *entri
 /* Randomly select count of key value pairs and store into 'keys' and
  * 'vals' args. The order of the picked entries is random, and the selections
  * are non-unique (repetitions are possible).
- * The 'vals' arg can be NULL in which case we skip these. */
-void lpRandomPairs(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals) {
+ * The 'vals' arg can be NULL in which case we skip these.
+ * 'tuple_len' indicates entry count of a single logical item. It should be 2
+ * if listpack was saved as key-value pair or more for key-value-...(n_entries). */
+void lpRandomPairs(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals, int tuple_len) {
     unsigned char *p, *key, *value;
     unsigned int klen = 0, vlen = 0;
     long long klval = 0, vlval = 0;
 
+    assert(tuple_len >= 2);
+
     /* Notice: the index member must be first due to the use in uintCompare */
     typedef struct {
         unsigned int index;
         unsigned int order;
     } rand_pick;
     rand_pick *picks = lp_malloc(sizeof(rand_pick)*count);
-    unsigned int total_size = lpLength(lp)/2;
+    unsigned int total_size = lpLength(lp)/tuple_len;
 
     /* Avoid div by zero on corrupt listpack */
     assert(total_size);
 
     /* create a pool of random indexes (some may be duplicate). */
     for (unsigned int i = 0; i < count; i++) {
-        picks[i].index = (rand() % total_size) * 2; /* Generate even indexes */
+        /* Generate indexes that key exist at */
+        picks[i].index = (rand() % total_size) * tuple_len;
         /* keep track of the order we picked them */
         picks[i].order = i;
     }
@@ -1529,8 +1732,11 @@ void lpRandomPairs(unsigned char *lp, unsigned int count, listpackEntry *keys, l
                 lpSaveValue(value, vlen, vlval, &vals[storeorder]);
              pickindex++;
         }
-        lpindex += 2;
-        p = lpNext(lp, p);
+        lpindex += tuple_len;
+
+        for (int i = 0; i < tuple_len - 1; i++) {
+            p = lpNext(lp, p);
+        }
     }
 
     lp_free(picks);
@@ -1540,13 +1746,20 @@ void lpRandomPairs(unsigned char *lp, unsigned int count, listpackEntry *keys, l
  * 'vals' args. The selections are unique (no repetitions), and the order of
  * the picked entries is NOT-random.
  * The 'vals' arg can be NULL in which case we skip these.
+ * 'tuple_len' indicates entry count of a single logical item. It should be 2
+ * if listpack was saved as key-value pair or more for key-value-...(n_entries).
  * The return value is the number of items picked which can be lower than the
  * requested count if the listpack doesn't hold enough pairs. */
-unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals) {
+unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count,
+                                 listpackEntry *keys, listpackEntry *vals,
+                                 int tuple_len)
+{
+    assert(tuple_len >= 2);
+
     unsigned char *p, *key;
     unsigned int klen = 0;
     long long klval = 0;
-    unsigned int total_size = lpLength(lp)/2;
+    unsigned int total_size = lpLength(lp)/tuple_len;
     unsigned int index = 0;
     if (count > total_size)
         count = total_size;
@@ -1554,7 +1767,7 @@ unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpack
     p = lpFirst(lp);
     unsigned int picked = 0, remaining = count;
     while (picked < count && p) {
-        assert((p = lpNextRandom(lp, p, &index, remaining, 1)));
+        assert((p = lpNextRandom(lp, p, &index, remaining, tuple_len)));
         key = lpGetValue(p, &klen, &klval);
         lpSaveValue(key, klen, klval, &keys[picked]);
         assert((p = lpNext(lp, p)));
@@ -1576,8 +1789,9 @@ unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpack
  * the end of the list. The 'index' needs to be initialized according to the
  * current zero-based index matching the position of the starting element 'p'
  * and is updated to match the returned element's zero-based index. If
- * 'even_only' is nonzero, an element with an even index is picked, which is
- * useful if the listpack represents a key-value pair sequence.
+ * 'tuple_len' indicates entry count of a single logical item. e.g. This is
+ * useful if listpack represents key-value pairs. In this case, tuple_len should
+ * be two and even indexes will be picked.
  *
  * Note that this function can return p. In order to skip the previously
  * returned element, you need to call lpNext() or lpDelete() after each call to
@@ -1587,7 +1801,7 @@ unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpack
  *     p = lpFirst(lp);
  *     i = 0;
  *     while (remaining > 0) {
- *         p = lpNextRandom(lp, p, &i, remaining--, 0);
+ *         p = lpNextRandom(lp, p, &i, remaining--, 1);
  *
  *         // ... Do stuff with p ...
  *
@@ -1596,8 +1810,9 @@ unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpack
  *     }
  */
 unsigned char *lpNextRandom(unsigned char *lp, unsigned char *p, unsigned int *index,
-                            unsigned int remaining, int even_only)
+                            unsigned int remaining, int tuple_len)
 {
+    assert(tuple_len > 0);
     /* To only iterate once, every time we try to pick a member, the probability
      * we pick it is the quotient of the count left we want to pick and the
      * count still we haven't visited. This way, we could make every member be
@@ -1605,15 +1820,14 @@ unsigned char *lpNextRandom(unsigned char *lp, unsigned char *p, unsigned int *i
     unsigned int i = *index;
     unsigned int total_size = lpLength(lp);
     while (i < total_size && p != NULL) {
-        if (even_only && i % 2 != 0) {
+        if (i % tuple_len != 0) {
             p = lpNext(lp, p);
             i++;
             continue;
         }
 
         /* Do we pick this element? */
-        unsigned int available = total_size - i;
-        if (even_only) available /= 2;
+        unsigned int available = (total_size - i) / tuple_len;
         double randomDouble = ((double)rand()) / RAND_MAX;
         double threshold = ((double)remaining) / available;
         if (randomDouble <= threshold) {
@@ -1809,6 +2023,24 @@ static int lpValidation(unsigned char *p, unsigned int head_count, void *userdat
     return ret;
 }
 
+static int lpFindCbCmp(const unsigned char *lp, unsigned char *p, void *user, unsigned char *s, long long slen) {
+    assert(lp);
+    assert(p);
+
+    char *n = user;
+
+    if (!s) {
+        int64_t sval;
+        if (lpStringToInt64((const char*)n, strlen(n), &sval))
+            return slen == sval ? 0 : 1;
+    } else {
+        if (strlen(n) == (size_t) slen && memcmp(n, s, slen) == 0)
+            return 0;
+    }
+
+    return 1;
+}
+
 int listpackTest(int argc, char *argv[], int flags) {
     UNUSED(argc);
     UNUSED(argv);
@@ -2053,6 +2285,111 @@ int listpackTest(int argc, char *argv[], int flags) {
         zfree(lp);
     }
 
+    TEST("Batch append") {
+        listpackEntry ent[6] = {
+                {.sval = (unsigned char*)mixlist[0], .slen = strlen(mixlist[0])},
+                {.sval = (unsigned char*)mixlist[1], .slen = strlen(mixlist[1])},
+                {.sval = (unsigned char*)mixlist[2], .slen = strlen(mixlist[2])},
+                {.lval = 4294967296},
+                {.sval = (unsigned char*)mixlist[3], .slen = strlen(mixlist[3])},
+                {.lval = -100}
+        };
+
+        lp = lpNew(0);
+        lp = lpBatchAppend(lp, ent, 2);
+        verifyEntry(lpSeek(lp, 0), ent[0].sval, ent[0].slen);
+        verifyEntry(lpSeek(lp, 1), ent[1].sval, ent[1].slen);
+        assert(lpLength(lp) == 2);
+
+        lp = lpBatchAppend(lp, &ent[2], 1);
+        verifyEntry(lpSeek(lp, 0), ent[0].sval, ent[0].slen);
+        verifyEntry(lpSeek(lp, 1), ent[1].sval, ent[1].slen);
+        verifyEntry(lpSeek(lp, 2), ent[2].sval, ent[2].slen);
+        assert(lpLength(lp) == 3);
+
+        lp = lpDeleteRange(lp, 1, 1);
+        verifyEntry(lpSeek(lp, 0), ent[0].sval, ent[0].slen);
+        verifyEntry(lpSeek(lp, 1), ent[2].sval, ent[2].slen);
+        assert(lpLength(lp) == 2);
+
+        lp = lpBatchAppend(lp, &ent[3], 3);
+        verifyEntry(lpSeek(lp, 0), ent[0].sval, ent[0].slen);
+        verifyEntry(lpSeek(lp, 1), ent[2].sval, ent[2].slen);
+        verifyEntry(lpSeek(lp, 2), (unsigned char*) "4294967296", 10);
+        verifyEntry(lpSeek(lp, 3), ent[4].sval, ent[4].slen);
+        verifyEntry(lpSeek(lp, 4), (unsigned char*) "-100", 4);
+        assert(lpLength(lp) == 5);
+
+        lp = lpDeleteRange(lp, 1, 3);
+        verifyEntry(lpSeek(lp, 0), ent[0].sval, ent[0].slen);
+        verifyEntry(lpSeek(lp, 1), (unsigned char*) "-100", 4);
+        assert(lpLength(lp) == 2);
+
+        lpFree(lp);
+    }
+
+    TEST("Batch insert") {
+        lp = lpNew(0);
+        listpackEntry ent[6] = {
+                {.sval = (unsigned char*)mixlist[0], .slen = strlen(mixlist[0])},
+                {.sval = (unsigned char*)mixlist[1], .slen = strlen(mixlist[1])},
+                {.sval = (unsigned char*)mixlist[2], .slen = strlen(mixlist[2])},
+                {.lval = 4294967296},
+                {.sval = (unsigned char*)mixlist[3], .slen = strlen(mixlist[3])},
+                {.lval = -100}
+        };
+
+        lp = lpBatchAppend(lp, ent, 4);
+        assert(lpLength(lp) == 4);
+        verifyEntry(lpSeek(lp, 0), ent[0].sval, ent[0].slen);
+        verifyEntry(lpSeek(lp, 1), ent[1].sval, ent[1].slen);
+        verifyEntry(lpSeek(lp, 2), ent[2].sval, ent[2].slen);
+        verifyEntry(lpSeek(lp, 3), (unsigned char*)"4294967296", 10);
+
+        /* Insert with LP_BEFORE */
+        p = lpSeek(lp, 3);
+        lp = lpBatchInsert(lp, p, LP_BEFORE, &ent[4], 2, &p);
+        verifyEntry(p, (unsigned char*)"-100", 4);
+        assert(lpLength(lp) == 6);
+        verifyEntry(lpSeek(lp, 0), ent[0].sval, ent[0].slen);
+        verifyEntry(lpSeek(lp, 1), ent[1].sval, ent[1].slen);
+        verifyEntry(lpSeek(lp, 2), ent[2].sval, ent[2].slen);
+        verifyEntry(lpSeek(lp, 3), ent[4].sval, ent[4].slen);
+        verifyEntry(lpSeek(lp, 4), (unsigned char*)"-100", 4);
+        verifyEntry(lpSeek(lp, 5), (unsigned char*)"4294967296", 10);
+
+        lp = lpDeleteRange(lp, 1, 2);
+        assert(lpLength(lp) == 4);
+        verifyEntry(lpSeek(lp, 0), ent[0].sval, ent[0].slen);
+        verifyEntry(lpSeek(lp, 1), ent[4].sval, ent[4].slen);
+        verifyEntry(lpSeek(lp, 2), (unsigned char*)"-100", 4);
+        verifyEntry(lpSeek(lp, 3), (unsigned char*)"4294967296", 10);
+
+        /* Insert with LP_AFTER */
+        p = lpSeek(lp, 0);
+        lp = lpBatchInsert(lp, p, LP_AFTER, &ent[1], 2, &p);
+        verifyEntry(p, ent[2].sval, ent[2].slen);
+        assert(lpLength(lp) == 6);
+        verifyEntry(lpSeek(lp, 0), ent[0].sval, ent[0].slen);
+        verifyEntry(lpSeek(lp, 1), ent[1].sval, ent[1].slen);
+        verifyEntry(lpSeek(lp, 2), ent[2].sval, ent[2].slen);
+        verifyEntry(lpSeek(lp, 3), ent[4].sval, ent[4].slen);
+        verifyEntry(lpSeek(lp, 4), (unsigned char*)"-100", 4);
+        verifyEntry(lpSeek(lp, 5), (unsigned char*)"4294967296", 10);
+
+        lp = lpDeleteRange(lp, 2, 4);
+        assert(lpLength(lp) == 2);
+        p = lpSeek(lp, 1);
+        lp = lpBatchInsert(lp, p, LP_AFTER, &ent[2], 1, &p);
+        verifyEntry(p, ent[2].sval, ent[2].slen);
+        assert(lpLength(lp) == 3);
+        verifyEntry(lpSeek(lp, 0), ent[0].sval, ent[0].slen);
+        verifyEntry(lpSeek(lp, 1), ent[1].sval, ent[1].slen);
+        verifyEntry(lpSeek(lp, 2), ent[2].sval, ent[2].slen);
+
+        lpFree(lp);
+    }
+
     TEST("Batch delete") {
         unsigned char *lp = createList(); /* char *mixlist[] = {"hello", "foo", "quux", "1024"} */
         assert(lpLength(lp) == 4); /* Pre-condition */
@@ -2232,7 +2569,7 @@ int listpackTest(int argc, char *argv[], int flags) {
             unsigned index = 0;
             while (remaining > 0) {
                 assert(p != NULL);
-                p = lpNextRandom(lp, p, &index, remaining--, 0);
+                p = lpNextRandom(lp, p, &index, remaining--, 1);
                 assert(p != NULL);
                 assert(p != prev);
                 prev = p;
@@ -2248,7 +2585,7 @@ int listpackTest(int argc, char *argv[], int flags) {
         unsigned i = 0;
 
         /* Pick from empty listpack returns NULL. */
-        assert(lpNextRandom(lp, NULL, &i, 2, 0) == NULL);
+        assert(lpNextRandom(lp, NULL, &i, 2, 1) == NULL);
 
         /* Add some elements and find their pointers within the listpack. */
         lp = lpAppend(lp, (unsigned char *)"abc", 3);
@@ -2261,19 +2598,19 @@ int listpackTest(int argc, char *argv[], int flags) {
         assert(lpNext(lp, p2) == NULL);
 
         /* Pick zero elements returns NULL. */
-        i = 0; assert(lpNextRandom(lp, lpFirst(lp), &i, 0, 0) == NULL);
+        i = 0; assert(lpNextRandom(lp, lpFirst(lp), &i, 0, 1) == NULL);
 
         /* Pick all returns all. */
-        i = 0; assert(lpNextRandom(lp, p0, &i, 3, 0) == p0 && i == 0);
-        i = 1; assert(lpNextRandom(lp, p1, &i, 2, 0) == p1 && i == 1);
-        i = 2; assert(lpNextRandom(lp, p2, &i, 1, 0) == p2 && i == 2);
+        i = 0; assert(lpNextRandom(lp, p0, &i, 3, 1) == p0 && i == 0);
+        i = 1; assert(lpNextRandom(lp, p1, &i, 2, 1) == p1 && i == 1);
+        i = 2; assert(lpNextRandom(lp, p2, &i, 1, 1) == p2 && i == 2);
 
         /* Pick more than one when there's only one left returns the last one. */
-        i = 2; assert(lpNextRandom(lp, p2, &i, 42, 0) == p2 && i == 2);
+        i = 2; assert(lpNextRandom(lp, p2, &i, 42, 1) == p2 && i == 2);
 
         /* Pick all even elements returns p0 and p2. */
-        i = 0; assert(lpNextRandom(lp, p0, &i, 10, 1) == p0 && i == 0);
-        i = 1; assert(lpNextRandom(lp, p1, &i, 10, 1) == p2 && i == 2);
+        i = 0; assert(lpNextRandom(lp, p0, &i, 10, 2) == p0 && i == 0);
+        i = 1; assert(lpNextRandom(lp, p1, &i, 10, 2) == p2 && i == 2);
 
         /* Don't crash even for bad index. */
         for (int j = 0; j < 100; j++) {
@@ -2286,7 +2623,7 @@ int listpackTest(int argc, char *argv[], int flags) {
             }
             i = j % 7;
             unsigned int remaining = j % 5;
-            p = lpNextRandom(lp, p, &i, remaining, 0);
+            p = lpNextRandom(lp, p, &i, remaining, 1);
             assert(p == p0 || p == p1 || p == p2 || p == NULL);
         }
         lpFree(lp);
@@ -2297,7 +2634,7 @@ int listpackTest(int argc, char *argv[], int flags) {
         unsigned char *lp = lpNew(0);
         lp = lpAppend(lp, (unsigned char*)"abc", 3);
         lp = lpAppend(lp, (unsigned char*)"123", 3);
-        lpRandomPair(lp, 1, &key, &val);
+        lpRandomPair(lp, 1, &key, &val, 2);
         assert(memcmp(key.sval, "abc", key.slen) == 0);
         assert(val.lval == 123);
         lpFree(lp);
@@ -2310,7 +2647,7 @@ int listpackTest(int argc, char *argv[], int flags) {
         lp = lpAppend(lp, (unsigned char*)"123", 3);
         lp = lpAppend(lp, (unsigned char*)"456", 3);
         lp = lpAppend(lp, (unsigned char*)"def", 3);
-        lpRandomPair(lp, 2, &key, &val);
+        lpRandomPair(lp, 2, &key, &val, 2);
         if (key.sval) {
             assert(!memcmp(key.sval, "abc", key.slen));
             assert(key.slen == 3);
@@ -2323,6 +2660,42 @@ int listpackTest(int argc, char *argv[], int flags) {
         lpFree(lp);
     }
 
+    TEST("Random pair with tuple_len 3") {
+        listpackEntry key, val;
+        unsigned char *lp = lpNew(0);
+        lp = lpAppend(lp, (unsigned char*)"abc", 3);
+        lp = lpAppend(lp, (unsigned char*)"123", 3);
+        lp = lpAppend(lp, (unsigned char*)"xxx", 3);
+        lp = lpAppend(lp, (unsigned char*)"456", 3);
+        lp = lpAppend(lp, (unsigned char*)"def", 3);
+        lp = lpAppend(lp, (unsigned char*)"xxx", 3);
+        lp = lpAppend(lp, (unsigned char*)"281474976710655", 15);
+        lp = lpAppend(lp, (unsigned char*)"789", 3);
+        lp = lpAppend(lp, (unsigned char*)"xxx", 3);
+
+        for (int i = 0; i < 5; i++) {
+            lpRandomPair(lp, 3, &key, &val, 3);
+            if (key.sval) {
+                if (!memcmp(key.sval, "abc", key.slen)) {
+                    assert(key.slen == 3);
+                    assert(val.lval == 123);
+                } else {
+                    assert(0);
+                };
+            }
+            if (!key.sval) {
+                if (key.lval == 456)
+                    assert(!memcmp(val.sval, "def", val.slen));
+                else if (key.lval == 281474976710655LL)
+                    assert(val.lval == 789);
+                else
+                    assert(0);
+            }
+        }
+
+        lpFree(lp);
+    }
+
     TEST("Random pairs with one element") {
         int count = 5;
         unsigned char *lp = lpNew(0);
@@ -2331,7 +2704,7 @@ int listpackTest(int argc, char *argv[], int flags) {
 
         lp = lpAppend(lp, (unsigned char*)"abc", 3);
         lp = lpAppend(lp, (unsigned char*)"123", 3);
-        lpRandomPairs(lp, count, keys, vals);
+        lpRandomPairs(lp, count, keys, vals, 2);
         assert(memcmp(keys[4].sval, "abc", keys[4].slen) == 0);
         assert(vals[4].lval == 123);
         zfree(keys);
@@ -2349,7 +2722,7 @@ int listpackTest(int argc, char *argv[], int flags) {
         lp = lpAppend(lp, (unsigned char*)"123", 3);
         lp = lpAppend(lp, (unsigned char*)"456", 3);
         lp = lpAppend(lp, (unsigned char*)"def", 3);
-        lpRandomPairs(lp, count, keys, vals);
+        lpRandomPairs(lp, count, keys, vals, 2);
         for (int i = 0; i < count; i++) {
             if (keys[i].sval) {
                 assert(!memcmp(keys[i].sval, "abc", keys[i].slen));
@@ -2366,6 +2739,47 @@ int listpackTest(int argc, char *argv[], int flags) {
         lpFree(lp);
     }
 
+    TEST("Random pairs with many elements and tuple_len 3") {
+        int count = 5;
+        lp = lpNew(0);
+        listpackEntry *keys = zcalloc(sizeof(listpackEntry) * count);
+        listpackEntry *vals = zcalloc(sizeof(listpackEntry) * count);
+
+        lp = lpAppend(lp, (unsigned char*)"abc", 3);
+        lp = lpAppend(lp, (unsigned char*)"123", 3);
+        lp = lpAppend(lp, (unsigned char*)"xxx", 3);
+        lp = lpAppend(lp, (unsigned char*)"456", 3);
+        lp = lpAppend(lp, (unsigned char*)"def", 3);
+        lp = lpAppend(lp, (unsigned char*)"xxx", 3);
+        lp = lpAppend(lp, (unsigned char*)"281474976710655", 15);
+        lp = lpAppend(lp, (unsigned char*)"789", 3);
+        lp = lpAppend(lp, (unsigned char*)"xxx", 3);
+
+        lpRandomPairs(lp, count, keys, vals, 3);
+        for (int i = 0; i < count; i++) {
+            if (keys[i].sval) {
+                if (!memcmp(keys[i].sval, "abc", keys[i].slen)) {
+                    assert(keys[i].slen == 3);
+                    assert(vals[i].lval == 123);
+                } else {
+                    assert(0);
+                };
+            }
+            if (!keys[i].sval) {
+                if (keys[i].lval == 456)
+                    assert(!memcmp(vals[i].sval, "def", vals[i].slen));
+                else if (keys[i].lval == 281474976710655LL)
+                    assert(vals[i].lval == 789);
+                else
+                    assert(0);
+            }
+        }
+
+        zfree(keys);
+        zfree(vals);
+        lpFree(lp);
+    }
+
     TEST("Random pairs unique with one element") {
         unsigned picked;
         int count = 5;
@@ -2375,7 +2789,7 @@ int listpackTest(int argc, char *argv[], int flags) {
 
         lp = lpAppend(lp, (unsigned char*)"abc", 3);
         lp = lpAppend(lp, (unsigned char*)"123", 3);
-        picked = lpRandomPairsUnique(lp, count, keys, vals);
+        picked = lpRandomPairsUnique(lp, count, keys, vals, 2);
         assert(picked == 1);
         assert(memcmp(keys[0].sval, "abc", keys[0].slen) == 0);
         assert(vals[0].lval == 123);
@@ -2395,7 +2809,7 @@ int listpackTest(int argc, char *argv[], int flags) {
         lp = lpAppend(lp, (unsigned char*)"123", 3);
         lp = lpAppend(lp, (unsigned char*)"456", 3);
         lp = lpAppend(lp, (unsigned char*)"def", 3);
-        picked = lpRandomPairsUnique(lp, count, keys, vals);
+        picked = lpRandomPairsUnique(lp, count, keys, vals, 2);
         assert(picked == 2);
         for (int i = 0; i < 2; i++) {
             if (keys[i].sval) {
@@ -2413,6 +2827,47 @@ int listpackTest(int argc, char *argv[], int flags) {
         lpFree(lp);
     }
 
+    TEST("Random pairs unique with many elements and tuple_len 3") {
+        unsigned picked;
+        int count = 5;
+        lp = lpNew(0);
+        listpackEntry *keys = zmalloc(sizeof(listpackEntry) * count);
+        listpackEntry *vals = zmalloc(sizeof(listpackEntry) * count);
+
+        lp = lpAppend(lp, (unsigned char*)"abc", 3);
+        lp = lpAppend(lp, (unsigned char*)"123", 3);
+        lp = lpAppend(lp, (unsigned char*)"xxx", 3);
+        lp = lpAppend(lp, (unsigned char*)"456", 3);
+        lp = lpAppend(lp, (unsigned char*)"def", 3);
+        lp = lpAppend(lp, (unsigned char*)"xxx", 3);
+        lp = lpAppend(lp, (unsigned char*)"281474976710655", 15);
+        lp = lpAppend(lp, (unsigned char*)"789", 3);
+        lp = lpAppend(lp, (unsigned char*)"xxx", 3);
+        picked = lpRandomPairsUnique(lp, count, keys, vals, 3);
+        assert(picked == 3);
+        for (int i = 0; i < 3; i++) {
+            if (keys[i].sval) {
+                if (!memcmp(keys[i].sval, "abc", keys[i].slen)) {
+                    assert(keys[i].slen == 3);
+                    assert(vals[i].lval == 123);
+                } else {
+                    assert(0);
+                };
+            }
+            if (!keys[i].sval) {
+                if (keys[i].lval == 456)
+                    assert(!memcmp(vals[i].sval, "def", vals[i].slen));
+                else if (keys[i].lval == 281474976710655LL)
+                    assert(vals[i].lval == 789);
+                else
+                    assert(0);
+            }
+        }
+        zfree(keys);
+        zfree(vals);
+        lpFree(lp);
+    }
+
     TEST("push various encodings") {
         lp = lpNew(0);
 
@@ -2471,6 +2926,21 @@ int listpackTest(int argc, char *argv[], int flags) {
         lpFree(lp);
     }
 
+    TEST("Test lpFindCb") {
+        lp = createList(); /* "hello", "foo", "quux", "1024" */
+        assert(lpFindCb(lp, lpFirst(lp), "abc", lpFindCbCmp, 0) == NULL);
+        verifyEntry(lpFindCb(lp, NULL, "hello", lpFindCbCmp, 0), (unsigned char*)"hello", 5);
+        verifyEntry(lpFindCb(lp, NULL, "1024", lpFindCbCmp, 0), (unsigned char*)"1024", 4);
+        verifyEntry(lpFindCb(lp, NULL, "quux", lpFindCbCmp, 0), (unsigned char*)"quux", 4);
+        verifyEntry(lpFindCb(lp, NULL, "foo", lpFindCbCmp, 0), (unsigned char*)"foo", 3);
+        lpFree(lp);
+
+        lp = lpNew(0);
+        assert(lpFindCb(lp, lpFirst(lp), "hello", lpFindCbCmp, 0) == NULL);
+        assert(lpFindCb(lp, lpFirst(lp), "1024", lpFindCbCmp, 0) == NULL);
+        lpFree(lp);
+    }
+
     TEST("Test lpValidateIntegrity") {
         lp = createList();
         long count = 0;
@@ -2493,6 +2963,26 @@ int listpackTest(int argc, char *argv[], int flags) {
         lpFree(lp);
     }
 
+    TEST("Test number of elements exceeds LP_HDR_NUMELE_UNKNOWN with batch insert") {
+        listpackEntry ent[2] = {
+                {.sval = (unsigned char*)mixlist[0], .slen = strlen(mixlist[0])},
+                {.sval = (unsigned char*)mixlist[1], .slen = strlen(mixlist[1])}
+        };
+
+        lp = lpNew(0);
+        for (int i = 0; i < (LP_HDR_NUMELE_UNKNOWN/2) + 1; i++)
+            lp = lpBatchAppend(lp, ent, 2);
+
+        assert(lpGetNumElements(lp) == LP_HDR_NUMELE_UNKNOWN);
+        assert(lpLength(lp) == LP_HDR_NUMELE_UNKNOWN+1);
+
+        lp = lpDeleteRange(lp, -2, 2);
+        assert(lpGetNumElements(lp) == LP_HDR_NUMELE_UNKNOWN);
+        assert(lpLength(lp) == LP_HDR_NUMELE_UNKNOWN-1);
+        assert(lpGetNumElements(lp) == LP_HDR_NUMELE_UNKNOWN-1); /* update length after lpLength */
+        lpFree(lp);
+    }
+
     TEST("Stress with random payloads of different encoding") {
         unsigned long long start = usec();
         int i,j,len,where;
diff --git a/src/listpack.h b/src/listpack.h
index a60f089f9cd..c9fbc56241b 100644
--- a/src/listpack.h
+++ b/src/listpack.h
@@ -4,32 +4,11 @@
  *
  *  https://github.com/antirez/listpack
  *
- * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2017-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __LISTPACK_H
@@ -70,18 +49,25 @@ unsigned char *lpReplaceInteger(unsigned char *lp, unsigned char **p, long long
 unsigned char *lpDelete(unsigned char *lp, unsigned char *p, unsigned char **newp);
 unsigned char *lpDeleteRangeWithEntry(unsigned char *lp, unsigned char **p, unsigned long num);
 unsigned char *lpDeleteRange(unsigned char *lp, long index, unsigned long num);
+unsigned char *lpBatchAppend(unsigned char *lp, listpackEntry *entries, unsigned long len);
+unsigned char *lpBatchInsert(unsigned char *lp, unsigned char *p, int where,
+                             listpackEntry *entries, unsigned int len, unsigned char **newp);
 unsigned char *lpBatchDelete(unsigned char *lp, unsigned char **ps, unsigned long count);
 unsigned char *lpMerge(unsigned char **first, unsigned char **second);
 unsigned char *lpDup(unsigned char *lp);
 unsigned long lpLength(unsigned char *lp);
 unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf);
 unsigned char *lpGetValue(unsigned char *p, unsigned int *slen, long long *lval);
+int lpGetIntegerValue(unsigned char *p, long long *lval);
 unsigned char *lpFind(unsigned char *lp, unsigned char *p, unsigned char *s, uint32_t slen, unsigned int skip);
+typedef int (*lpCmp)(const unsigned char *lp, unsigned char *p, void *user, unsigned char *s, long long slen);
+unsigned char *lpFindCb(unsigned char *lp, unsigned char *p, void *user, lpCmp cmp, unsigned int skip);
 unsigned char *lpFirst(unsigned char *lp);
 unsigned char *lpLast(unsigned char *lp);
 unsigned char *lpNext(unsigned char *lp, unsigned char *p);
 unsigned char *lpPrev(unsigned char *lp, unsigned char *p);
 size_t lpBytes(unsigned char *lp);
+size_t lpEntrySizeInteger(long long lval);
 size_t lpEstimateBytesRepeatedInteger(long long lval, unsigned long rep);
 unsigned char *lpSeek(unsigned char *lp, long index);
 typedef int (*listpackValidateEntryCB)(unsigned char *p, unsigned int head_count, void *userdata);
@@ -90,12 +76,15 @@ int lpValidateIntegrity(unsigned char *lp, size_t size, int deep,
 unsigned char *lpValidateFirst(unsigned char *lp);
 int lpValidateNext(unsigned char *lp, unsigned char **pp, size_t lpbytes);
 unsigned int lpCompare(unsigned char *p, unsigned char *s, uint32_t slen);
-void lpRandomPair(unsigned char *lp, unsigned long total_count, listpackEntry *key, listpackEntry *val);
-void lpRandomPairs(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals);
-unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals);
+void lpRandomPair(unsigned char *lp, unsigned long total_count,
+                  listpackEntry *key, listpackEntry *val, int tuple_len);
+void lpRandomPairs(unsigned char *lp, unsigned int count,
+                   listpackEntry *keys, listpackEntry *vals, int tuple_len);
+unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count,
+                                 listpackEntry *keys, listpackEntry *vals, int tuple_len);
 void lpRandomEntries(unsigned char *lp, unsigned int count, listpackEntry *entries);
 unsigned char *lpNextRandom(unsigned char *lp, unsigned char *p, unsigned int *index,
-                            unsigned int remaining, int even_only);
+                            unsigned int remaining, int tuple_len);
 int lpSafeToAdd(unsigned char* lp, size_t add);
 void lpRepr(unsigned char *lp);
 
diff --git a/src/listpack_malloc.h b/src/listpack_malloc.h
index a8a81c35e03..55c8cf5be2e 100644
--- a/src/listpack_malloc.h
+++ b/src/listpack_malloc.h
@@ -1,32 +1,11 @@
 /* Listpack -- A lists of strings serialization format
  * https://github.com/antirez/listpack
  *
- * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2017-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 /* Allocator selection.
diff --git a/src/localtime.c b/src/localtime.c
index 1cefdfa88cd..7f014cefcb4 100644
--- a/src/localtime.c
+++ b/src/localtime.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2018, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2018-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include <time.h>
diff --git a/src/logreqres.c b/src/logreqres.c
index 6e7621d35db..a18bf3efb4e 100644
--- a/src/logreqres.c
+++ b/src/logreqres.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2021, Redis Ltd.
+ * Copyright (c) 2021-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 /* This file implements the interface of logging clients' requests and
diff --git a/src/lolwut.c b/src/lolwut.c
index c014840e9af..34defdb70fc 100644
--- a/src/lolwut.c
+++ b/src/lolwut.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2018, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2018-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  *
  * ----------------------------------------------------------------------------
  *
diff --git a/src/lolwut.h b/src/lolwut.h
index 682d00531f6..97471ac55e0 100644
--- a/src/lolwut.h
+++ b/src/lolwut.h
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2018-2019, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2018-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 /* This structure represents our canvas. Drawing functions will take a pointer
diff --git a/src/lolwut5.c b/src/lolwut5.c
index 1240168d0d8..9f20292f47d 100644
--- a/src/lolwut5.c
+++ b/src/lolwut5.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2018, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2018-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  *
  * ----------------------------------------------------------------------------
  *
diff --git a/src/lolwut6.c b/src/lolwut6.c
index 1ba111c2d0b..1ccc643cb92 100644
--- a/src/lolwut6.c
+++ b/src/lolwut6.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2019, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2019-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  *
  * ----------------------------------------------------------------------------
  *
diff --git a/src/memtest.c b/src/memtest.c
index 1ca4b82cf9c..f5f49d1d3d2 100644
--- a/src/memtest.c
+++ b/src/memtest.c
@@ -1,36 +1,14 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 #include <stdint.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
-#include <assert.h>
 #include <limits.h>
 #include <errno.h>
 #include <termios.h>
@@ -39,6 +17,7 @@
 #include <stropts.h>
 #endif
 #include "config.h"
+#include "redisassert.h"
 
 #if (ULONG_MAX == 4294967295UL)
 #define MEMTEST_32BIT
diff --git a/src/mkreleasehdr.sh b/src/mkreleasehdr.sh
index 117b9e86f2d..04bc45a1695 100755
--- a/src/mkreleasehdr.sh
+++ b/src/mkreleasehdr.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 GIT_SHA1=`(git show-ref --head --hash=8 2> /dev/null || echo 00000000) | head -n1`
-GIT_DIRTY=`git diff --no-ext-diff 2> /dev/null | wc -l`
+GIT_DIRTY=`git diff --no-ext-diff -- ../src ../deps 2> /dev/null | wc -l`
 BUILD_ID=`uname -n`"-"`date +%s`
 if [ -n "$SOURCE_DATE_EPOCH" ]; then
   BUILD_ID=$(date -u -d "@$SOURCE_DATE_EPOCH" +%s 2>/dev/null || date -u -r "$SOURCE_DATE_EPOCH" +%s 2>/dev/null || date -u +%s)
diff --git a/src/module.c b/src/module.c
index 0addeecde8f..3920f1cffdf 100644
--- a/src/module.c
+++ b/src/module.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2016-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 /* --------------------------------------------------------------------------
@@ -59,6 +38,7 @@
 #include "script.h"
 #include "call_reply.h"
 #include "hdr_histogram.h"
+#include "crc16_slottable.h"
 #include <dlfcn.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
@@ -306,7 +286,6 @@ static size_t moduleTempClientMinCount = 0; /* Min client count in pool since
  * allow thread safe contexts to execute commands at a safe moment. */
 static pthread_mutex_t moduleGIL = PTHREAD_MUTEX_INITIALIZER;
 
-
 /* Function pointer type for keyspace event notification subscriptions from modules. */
 typedef int (*RedisModuleNotificationFunc) (RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key);
 
@@ -505,6 +484,10 @@ static struct redisCommandArg *moduleCopyCommandArgs(RedisModuleCommandArg *args
 static redisCommandArgType moduleConvertArgType(RedisModuleCommandArgType type, int *error);
 static int moduleConvertArgFlags(int flags);
 void moduleCreateContext(RedisModuleCtx *out_ctx, RedisModule *module, int ctx_flags);
+
+/* Common helper functions. */
+int moduleVerifyResourceName(const char *name);
+
 /* --------------------------------------------------------------------------
  * ## Heap allocation raw functions
  *
@@ -542,11 +525,23 @@ void *RM_Calloc(size_t nmemb, size_t size) {
     return zcalloc_usable(nmemb*size,NULL);
 }
 
+/* Similar to RM_Calloc, but returns NULL in case of allocation failure, instead
+ * of panicking. */
+void *RM_TryCalloc(size_t nmemb, size_t size) {
+    return ztrycalloc_usable(nmemb*size,NULL);
+}
+
 /* Use like realloc() for memory obtained with RedisModule_Alloc(). */
 void* RM_Realloc(void *ptr, size_t bytes) {
     return zrealloc_usable(ptr,bytes,NULL);
 }
 
+/* Similar to RM_Realloc, but returns NULL in case of allocation failure,
+ * instead of panicking. */
+void *RM_TryRealloc(void *ptr, size_t bytes) {
+    return ztryrealloc_usable(ptr,bytes,NULL);
+}
+
 /* Use like free() for memory obtained by RedisModule_Alloc() and
  * RedisModule_Realloc(). However you should never try to free with
  * RedisModule_Free() memory allocated with malloc() inside your module. */
@@ -750,7 +745,7 @@ int moduleDelKeyIfEmpty(RedisModuleKey *key) {
     case OBJ_LIST: isempty = listTypeLength(o) == 0; break;
     case OBJ_SET: isempty = setTypeSize(o) == 0; break;
     case OBJ_ZSET: isempty = zsetLength(o) == 0; break;
-    case OBJ_HASH: isempty = hashTypeLength(o) == 0; break;
+    case OBJ_HASH: isempty = hashTypeLength(o, 0) == 0; break;
     case OBJ_STREAM: isempty = streamLength(o) == 0; break;
     default: isempty = 0;
     }
@@ -1447,6 +1442,45 @@ int populateArgsStructure(struct redisCommandArg *args) {
     return count;
 }
 
+/* RedisModule_AddACLCategory can be used to add new ACL command categories. Category names
+ * can only contain alphanumeric characters, underscores, or dashes. Categories can only be added
+ * during the RedisModule_OnLoad function. Once a category has been added, it can not be removed. 
+ * Any module can register a command to any added categories using RedisModule_SetCommandACLCategories.
+ * 
+ * Returns:
+ * - REDISMODULE_OK on successfully adding the new ACL category. 
+ * - REDISMODULE_ERR on failure.
+ * 
+ * On error the errno is set to:
+ * - EINVAL if the name contains invalid characters.
+ * - EBUSY if the category name already exists.
+ * - ENOMEM if the number of categories reached the max limit of 64 categories.
+ */
+int RM_AddACLCategory(RedisModuleCtx *ctx, const char *name) {
+    if (!ctx->module->onload) {
+        errno = EINVAL;
+        return REDISMODULE_ERR;
+    }
+
+    if (moduleVerifyResourceName(name) == REDISMODULE_ERR) {
+        errno = EINVAL;
+        return REDISMODULE_ERR;
+    }
+
+    if (ACLGetCommandCategoryFlagByName(name)) {
+        errno = EBUSY;
+        return REDISMODULE_ERR;
+    }
+
+    if (ACLAddCommandCategory(name, 0)) {
+        ctx->module->num_acl_categories_added++;
+        return REDISMODULE_OK;
+    } else {
+        errno = ENOMEM;
+        return REDISMODULE_ERR;
+    }
+}
+
 /* Helper for categoryFlagsFromString(). Attempts to find an acl flag representing the provided flag string
  * and adds that flag to acl_categories_flags if a match is found.
  *
@@ -2252,6 +2286,7 @@ void RM_SetModuleAttribs(RedisModuleCtx *ctx, const char *name, int ver, int api
     module->loadmod = NULL;
     module->num_commands_with_acl_categories = 0;
     module->onload = 1;
+    module->num_acl_categories_added = 0;
     ctx->module = module;
 }
 
@@ -2294,7 +2329,10 @@ ustime_t RM_CachedMicroseconds(void) {
  * Within the same command, you can call multiple times
  * RM_BlockedClientMeasureTimeStart() and RM_BlockedClientMeasureTimeEnd()
  * to accumulate independent time intervals to the background duration.
- * This method always return REDISMODULE_OK. */
+ * This method always return REDISMODULE_OK.
+ * 
+ * This function is not thread safe, If used in module thread and blocked callback (possibly main thread)
+ * simultaneously, it's recommended to protect them with lock owned by caller instead of GIL. */
 int RM_BlockedClientMeasureTimeStart(RedisModuleBlockedClient *bc) {
     elapsedStart(&(bc->background_timer));
     return REDISMODULE_OK;
@@ -2304,7 +2342,10 @@ int RM_BlockedClientMeasureTimeStart(RedisModuleBlockedClient *bc) {
  * to calculate the elapsed execution time.
  * On success REDISMODULE_OK is returned.
  * This method only returns REDISMODULE_ERR if no start time was
- * previously defined ( meaning RM_BlockedClientMeasureTimeStart was not called ). */
+ * previously defined ( meaning RM_BlockedClientMeasureTimeStart was not called ).
+ * 
+ * This function is not thread safe, If used in module thread and blocked callback (possibly main thread)
+ * simultaneously, it's recommended to protect them with lock owned by caller instead of GIL. */
 int RM_BlockedClientMeasureTimeEnd(RedisModuleBlockedClient *bc) {
     // If the counter is 0 then we haven't called RM_BlockedClientMeasureTimeStart
     if (!bc->background_timer)
@@ -2363,7 +2404,33 @@ void RM_Yield(RedisModuleCtx *ctx, int flags, const char *busy_reply) {
                 server.busy_module_yield_flags |= BUSY_MODULE_YIELD_CLIENTS;
 
             /* Let redis process events */
-            processEventsWhileBlocked();
+            if (!pthread_equal(server.main_thread_id, pthread_self())) {
+                /* If we are not in the main thread, we defer event loop processing to the main thread
+                 * after the main thread enters acquiring GIL state in order to protect the event
+                 * loop (ae.c) and avoid potential race conditions. */
+
+                int acquiring;
+                atomicGet(server.module_gil_acquring, acquiring);
+                if (!acquiring) {
+                    /* If the main thread has not yet entered the acquiring GIL state,
+                     * we attempt to wake it up and exit without waiting for it to
+                     * acquire the GIL. This avoids blocking the caller, allowing them to
+                     * continue with unfinished tasks before the next yield.
+                     * We assume the caller keeps the GIL locked. */
+                    if (write(server.module_pipe[1],"A",1) != 1) {
+                        /* Ignore the error, this is best-effort. */
+                    }
+                } else {
+                    /* Release the GIL, yielding CPU to give the main thread an opportunity to start
+                     * event processing, and then acquire the GIL again until the main thread releases it. */
+                    moduleReleaseGIL();
+                    usleep(0);
+                    moduleAcquireGIL();
+                }
+            } else {
+                /* If we are in the main thread, we can safely process events. */
+                processEventsWhileBlocked();
+            }
 
             server.busy_module_yield_reply = prev_busy_module_yield_reply;
             /* Possibly restore the previous flags in case of two nested contexts
@@ -2647,7 +2714,10 @@ RedisModuleString *RM_CreateStringFromStreamID(RedisModuleCtx *ctx, const RedisM
  * pass ctx as NULL when releasing the string (but passing a context will not
  * create any issue). Strings created with a context should be freed also passing
  * the context, so if you want to free a string out of context later, make sure
- * to create it using a NULL context. */
+ * to create it using a NULL context.
+ *
+ * This API is not thread safe, access to these retained strings (if they originated
+ * from a client command arguments) must be done with GIL locked. */
 void RM_FreeString(RedisModuleCtx *ctx, RedisModuleString *str) {
     decrRefCount(str);
     if (ctx != NULL) autoMemoryFreed(ctx,REDISMODULE_AM_STRING,str);
@@ -2684,7 +2754,10 @@ void RM_FreeString(RedisModuleCtx *ctx, RedisModuleString *str) {
  *
  * Threaded modules that reference retained strings from other threads *must*
  * explicitly trim the allocation as soon as the string is retained. Not doing
- * so may result with automatic trimming which is not thread safe. */
+ * so may result with automatic trimming which is not thread safe.
+ *
+ * This API is not thread safe, access to these retained strings (if they originated
+ * from a client command arguments) must be done with GIL locked. */
 void RM_RetainString(RedisModuleCtx *ctx, RedisModuleString *str) {
     if (ctx == NULL || !autoMemoryFreed(ctx,REDISMODULE_AM_STRING,str)) {
         /* Increment the string reference counting only if we can't
@@ -2726,7 +2799,10 @@ void RM_RetainString(RedisModuleCtx *ctx, RedisModuleString *str) {
  *
  * Threaded modules that reference held strings from other threads *must*
  * explicitly trim the allocation as soon as the string is held. Not doing
- * so may result with automatic trimming which is not thread safe. */
+ * so may result with automatic trimming which is not thread safe.
+ *
+ * This API is not thread safe, access to these retained strings (if they originated
+ * from a client command arguments) must be done with GIL locked. */
 RedisModuleString* RM_HoldString(RedisModuleCtx *ctx, RedisModuleString *str) {
     if (str->refcount == OBJ_STATIC_REFCOUNT) {
         return RM_CreateStringFromString(ctx, str);
@@ -3448,9 +3524,7 @@ int RM_ReplyWithLongDouble(RedisModuleCtx *ctx, long double ld) {
  *
  * The replicated commands are always wrapped into the MULTI/EXEC that
  * contains all the commands replicated in a given module command
- * execution. However the commands replicated with RedisModule_Call()
- * are the first items, the ones replicated with RedisModule_Replicate()
- * will all follow before the EXEC.
+ * execution, in the order they were executed.
  *
  * Modules should try to use one interface or the other.
  *
@@ -3472,9 +3546,8 @@ int RM_ReplyWithLongDouble(RedisModuleCtx *ctx, long double ld) {
  * the callback, and will propagate all the commands wrapped in a MULTI/EXEC
  * transaction. However when calling this function from a threaded safe context
  * that can live an undefined amount of time, and can be locked/unlocked in
- * at will, the behavior is different: MULTI/EXEC wrapper is not emitted
- * and the command specified is inserted in the AOF and replication stream
- * immediately.
+ * at will, it is important to note that this API is not thread-safe and
+ * must be executed while holding the GIL.
  *
  * #### Return value
  *
@@ -3512,15 +3585,18 @@ int RM_Replicate(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...)
 }
 
 /* This function will replicate the command exactly as it was invoked
- * by the client. Note that this function will not wrap the command into
- * a MULTI/EXEC stanza, so it should not be mixed with other replication
- * commands.
+ * by the client. Note that the replicated commands are always wrapped
+ * into the MULTI/EXEC that contains all the commands replicated in a
+ * given module command execution, in the order they were executed.
  *
  * Basically this form of replication is useful when you want to propagate
  * the command to the slaves and AOF file exactly as it was called, since
  * the command can just be re-executed to deterministically re-create the
  * new state starting from the old one.
  *
+ * It is important to note that this API is not thread-safe and
+ * must be executed while holding the GIL.
+ *
  * The function always returns REDISMODULE_OK. */
 int RM_ReplicateVerbatim(RedisModuleCtx *ctx) {
     alsoPropagate(ctx->client->db->id,
@@ -4092,7 +4168,7 @@ size_t RM_ValueLength(RedisModuleKey *key) {
     case OBJ_LIST: return listTypeLength(key->value);
     case OBJ_SET: return setTypeSize(key->value);
     case OBJ_ZSET: return zsetLength(key->value);
-    case OBJ_HASH: return hashTypeLength(key->value);
+    case OBJ_HASH: return hashTypeLength(key->value, 0);  /* OPEN: To subtract expired fields? */
     case OBJ_STREAM: return streamLength(key->value);
     default: return 0;
     }
@@ -4199,7 +4275,7 @@ void RM_ResetDataset(int restart_aof, int async) {
 
 /* Returns the number of keys in the current db. */
 unsigned long long RM_DbSize(RedisModuleCtx *ctx) {
-    return dictSize(ctx->client->db->dict);
+    return dbSize(ctx->client->db);
 }
 
 /* Returns a name of a random key, or NULL if current db is empty. */
@@ -4836,8 +4912,8 @@ int zsetInitScoreRange(RedisModuleKey *key, double min, double max, int minex, i
     } else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) {
         zset *zs = key->value->ptr;
         zskiplist *zsl = zs->zsl;
-        key->u.zset.current = first ? zslFirstInRange(zsl,zrs) :
-                                      zslLastInRange(zsl,zrs);
+        key->u.zset.current = first ? zslNthInRange(zsl,zrs,0) :
+                                      zslNthInRange(zsl,zrs,-1);
     } else {
         serverPanic("Unsupported zset encoding");
     }
@@ -4900,8 +4976,8 @@ int zsetInitLexRange(RedisModuleKey *key, RedisModuleString *min, RedisModuleStr
     } else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) {
         zset *zs = key->value->ptr;
         zskiplist *zsl = zs->zsl;
-        key->u.zset.current = first ? zslFirstInLexRange(zsl,zlrs) :
-                                      zslLastInLexRange(zsl,zlrs);
+        key->u.zset.current = first ? zslNthInLexRange(zsl,zlrs,0) :
+                                      zslNthInLexRange(zsl,zlrs,-1);
     } else {
         serverPanic("Unsupported zset encoding");
     }
@@ -5195,7 +5271,21 @@ int RM_HashSet(RedisModuleKey *key, int flags, ...) {
 
         /* Handle XX and NX */
         if (flags & (REDISMODULE_HASH_XX|REDISMODULE_HASH_NX)) {
-            int exists = hashTypeExists(key->value, field->ptr);
+            int hfeFlags = HFE_LAZY_AVOID_HASH_DEL; /* Avoid invalidate the key */
+
+            /*
+             * The hash might contain expired fields. If we lazily delete expired
+             * field and the command was sent with XX flag, the operation could
+             * fail and leave the hash empty, which the caller might not expect.
+             * To prevent unexpected behavior, we avoid lazy deletion in this case
+             * yet let the operation fail. Note that moduleDelKeyIfEmpty()
+             * below won't delete the hash if it left with single expired key
+             * because hash counts blindly expired fields as well.
+             */
+            if (flags & REDISMODULE_HASH_XX)
+                hfeFlags |= HFE_LAZY_AVOID_FIELD_DEL;
+
+            int exists = hashTypeExists(key->db, key->value, field->ptr, hfeFlags, NULL);
             if (((flags & REDISMODULE_HASH_XX) && !exists) ||
                 ((flags & REDISMODULE_HASH_NX) && exists))
             {
@@ -5206,7 +5296,7 @@ int RM_HashSet(RedisModuleKey *key, int flags, ...) {
 
         /* Handle deletion if value is REDISMODULE_HASH_DELETE. */
         if (value == REDISMODULE_HASH_DELETE) {
-            count += hashTypeDelete(key->value, field->ptr);
+            count += hashTypeDelete(key->value, field->ptr, 1);
             if (flags & REDISMODULE_HASH_CFIELDS) decrRefCount(field);
             continue;
         }
@@ -5219,8 +5309,8 @@ int RM_HashSet(RedisModuleKey *key, int flags, ...) {
             low_flags |= HASH_SET_TAKE_FIELD;
 
         robj *argv[2] = {field,value};
-        hashTypeTryConversion(key->value,argv,0,1);
-        int updated = hashTypeSet(key->value, field->ptr, value->ptr, low_flags);
+        hashTypeTryConversion(key->db,key->value,argv,0,1);
+        int updated = hashTypeSet(key->db, key->value, field->ptr, value->ptr, low_flags);
         count += (flags & REDISMODULE_HASH_COUNT_ALL) ? 1 : updated;
 
         /* If CFIELDS is active, SDS string ownership is now of hashTypeSet(),
@@ -5278,6 +5368,7 @@ int RM_HashSet(RedisModuleKey *key, int flags, ...) {
  * RedisModule_FreeString(), or by enabling automatic memory management.
  */
 int RM_HashGet(RedisModuleKey *key, int flags, ...) {
+    int hfeFlags = HFE_LAZY_AVOID_FIELD_DEL | HFE_LAZY_AVOID_HASH_DEL;
     va_list ap;
     if (key->value && key->value->type != OBJ_HASH) return REDISMODULE_ERR;
 
@@ -5298,14 +5389,17 @@ int RM_HashGet(RedisModuleKey *key, int flags, ...) {
         /* Query the hash for existence or value object. */
         if (flags & REDISMODULE_HASH_EXISTS) {
             existsptr = va_arg(ap,int*);
-            if (key->value)
-                *existsptr = hashTypeExists(key->value,field->ptr);
-            else
+            if (key->value) {
+                *existsptr = hashTypeExists(key->db, key->value, field->ptr, hfeFlags, NULL);
+            } else {
                 *existsptr = 0;
+            }
         } else {
             valueptr = va_arg(ap,RedisModuleString**);
             if (key->value) {
-                *valueptr = hashTypeGetValueObject(key->value,field->ptr);
+                *valueptr = hashTypeGetValueObject(key->db, key->value, field->ptr,
+                                                   hfeFlags, NULL);
+
                 if (*valueptr) {
                     robj *decoded = getDecodedObject(*valueptr);
                     decrRefCount(*valueptr);
@@ -6422,7 +6516,7 @@ RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const ch
         c->flags &= ~(CLIENT_READONLY|CLIENT_ASKING);
         c->flags |= ctx->client->flags & (CLIENT_READONLY|CLIENT_ASKING);
         if (getNodeByQuery(c,c->cmd,c->argv,c->argc,NULL,&error_code) !=
-                           server.cluster->myself)
+                           getMyClusterNode())
         {
             sds msg = NULL;
             if (error_code == CLUSTER_REDIR_DOWN_RO_STATE) {
@@ -7602,6 +7696,13 @@ void RM_LatencyAddSample(const char *event, mstime_t latency) {
  * https://redis.io/topics/modules-blocking-ops.
  * -------------------------------------------------------------------------- */
 
+/* Returns 1 if the client already in the moduleUnblocked list, 0 otherwise. */
+int isModuleClientUnblocked(client *c) {
+    RedisModuleBlockedClient *bc = c->bstate.module_blocked_handle;
+
+    return bc->unblocked == 1;
+}
+
 /* This is called from blocked.c in order to unblock a client: may be called
  * for multiple reasons while the client is in the middle of being blocked
  * because the client is terminated, but is also called for cleanup when a
@@ -7706,15 +7807,15 @@ RedisModuleBlockedClient *moduleBlockClient(RedisModuleCtx *ctx, RedisModuleCmdF
     bc->background_timer = 0;
     bc->background_duration = 0;
 
-    c->bstate.timeout = 0;
+    mstime_t timeout = 0;
     if (timeout_ms) {
         mstime_t now = mstime();
-        if  (timeout_ms > LLONG_MAX - now) {
+        if (timeout_ms > LLONG_MAX - now) {
             c->bstate.module_blocked_handle = NULL;
             addReplyError(c, "timeout is out of range"); /* 'timeout_ms+now' would overflow */
             return bc;
         }
-        c->bstate.timeout = timeout_ms + now;
+        timeout = timeout_ms + now;
     }
 
     if (islua || ismulti) {
@@ -7730,8 +7831,9 @@ RedisModuleBlockedClient *moduleBlockClient(RedisModuleCtx *ctx, RedisModuleCmdF
         addReplyError(c, "Clients undergoing module based authentication can only be blocked on auth");
     } else {
         if (keys) {
-            blockForKeys(c,BLOCKED_MODULE,keys,numkeys,c->bstate.timeout,flags&REDISMODULE_BLOCK_UNBLOCK_DELETED);
+            blockForKeys(c,BLOCKED_MODULE,keys,numkeys,timeout,flags&REDISMODULE_BLOCK_UNBLOCK_DELETED);
         } else {
+            c->bstate.timeout = timeout;
             blockClient(c,BLOCKED_MODULE);
         }
     }
@@ -8158,7 +8260,7 @@ int RM_UnblockClient(RedisModuleBlockedClient *bc, void *privdata) {
          * argument, but better to be safe than sorry. */
         if (bc->timeout_callback == NULL) return REDISMODULE_ERR;
         if (bc->unblocked) return REDISMODULE_OK;
-        if (bc->client) moduleBlockedClientTimedOut(bc->client);
+        if (bc->client) moduleBlockedClientTimedOut(bc->client, 1);
     }
     moduleUnblockClientByHandle(bc,privdata);
     return REDISMODULE_OK;
@@ -8257,8 +8359,10 @@ void moduleHandleBlockedClients(void) {
          * This needs to be out of the reply callback above given that a
          * module might not define any callback and still do blocking ops.
          */
-        if (c && !clientHasModuleAuthInProgress(c) && !bc->blocked_on_keys) {
-            updateStatsOnUnblock(c, bc->background_duration, reply_us, server.stat_total_error_replies != prev_error_replies);
+        if (c && !clientHasModuleAuthInProgress(c)) {
+            int had_errors = c->deferred_reply_errors ? !!listLength(c->deferred_reply_errors) :
+                (server.stat_total_error_replies != prev_error_replies);
+            updateStatsOnUnblock(c, bc->background_duration, reply_us, had_errors);
         }
 
         if (c != NULL) {
@@ -8276,7 +8380,7 @@ void moduleHandleBlockedClients(void) {
              * if there are pending replies here. This is needed since
              * during a non blocking command the client may receive output. */
             if (!clientHasModuleAuthInProgress(c) && clientHasPendingReplies(c) &&
-                !(c->flags & CLIENT_PENDING_WRITE))
+                !(c->flags & CLIENT_PENDING_WRITE) && c->conn)
             {
                 c->flags |= CLIENT_PENDING_WRITE;
                 listLinkNodeHead(server.clients_pending_write, &c->clients_pending_write_node);
@@ -8311,8 +8415,15 @@ int moduleBlockedClientMayTimeout(client *c) {
 /* Called when our client timed out. After this function unblockClient()
  * is called, and it will invalidate the blocked client. So this function
  * does not need to do any cleanup. Eventually the module will call the
- * API to unblock the client and the memory will be released. */
-void moduleBlockedClientTimedOut(client *c) {
+ * API to unblock the client and the memory will be released. 
+ *
+ * If this function is called from a module, we handle the timeout callback
+ * and the update of the unblock status in a thread-safe manner to avoid race
+ * conditions with the main thread.
+ * If this function is called from the main thread, we must handle the unblocking
+ * of the client synchronously. This ensures that we can reply to the client before
+ * resetClient() is called. */
+void moduleBlockedClientTimedOut(client *c, int from_module) {
     RedisModuleBlockedClient *bc = c->bstate.module_blocked_handle;
 
     /* Protect against re-processing: don't serve clients that are already
@@ -8321,14 +8432,27 @@ void moduleBlockedClientTimedOut(client *c) {
     if (bc->unblocked) return;
 
     RedisModuleCtx ctx;
-    moduleCreateContext(&ctx, bc->module, REDISMODULE_CTX_BLOCKED_TIMEOUT);
+    int flags = REDISMODULE_CTX_BLOCKED_TIMEOUT;
+    if (from_module) flags |= REDISMODULE_CTX_THREAD_SAFE;
+    moduleCreateContext(&ctx, bc->module, flags);
     ctx.client = bc->client;
     ctx.blocked_client = bc;
     ctx.blocked_privdata = bc->privdata;
-    long long prev_error_replies = server.stat_total_error_replies;
-    bc->timeout_callback(&ctx,(void**)c->argv,c->argc);
+
+    long long prev_error_replies;
+    if (!from_module)
+        prev_error_replies = server.stat_total_error_replies;
+
+    if (bc->timeout_callback) {
+        /* In theory, the user should always pass the timeout handler as an
+         * argument, but better to be safe than sorry. */
+        bc->timeout_callback(&ctx,(void**)c->argv,c->argc);
+    }
+
     moduleFreeContext(&ctx);
-    updateStatsOnUnblock(c, bc->background_duration, 0, server.stat_total_error_replies != prev_error_replies);
+
+    if (!from_module)
+        updateStatsOnUnblock(c, bc->background_duration, 0, server.stat_total_error_replies != prev_error_replies);
 
     /* For timeout events, we do not want to call the disconnect callback,
      * because the blocked client will be automatically disconnected in
@@ -8719,11 +8843,12 @@ void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid)
             /* mark the handler as active to avoid reentrant loops.
              * If the subscriber performs an action triggering itself,
              * it will not be notified about it. */
+            int prev_active = sub->active;
             sub->active = 1;
             server.lazy_expire_disabled++;
             sub->notify_callback(&ctx, type, event, key);
             server.lazy_expire_disabled--;
-            sub->active = 0;
+            sub->active = prev_active;
             moduleFreeContext(&ctx);
         }
     }
@@ -8872,23 +8997,7 @@ char **RM_GetClusterNodesList(RedisModuleCtx *ctx, size_t *numnodes) {
     UNUSED(ctx);
 
     if (!server.cluster_enabled) return NULL;
-    size_t count = dictSize(server.cluster->nodes);
-    char **ids = zmalloc((count+1)*REDISMODULE_NODE_ID_LEN);
-    dictIterator *di = dictGetIterator(server.cluster->nodes);
-    dictEntry *de;
-    int j = 0;
-    while((de = dictNext(di)) != NULL) {
-        clusterNode *node = dictGetVal(de);
-        if (node->flags & (CLUSTER_NODE_NOADDR|CLUSTER_NODE_HANDSHAKE)) continue;
-        ids[j] = zmalloc(REDISMODULE_NODE_ID_LEN);
-        memcpy(ids[j],node->name,REDISMODULE_NODE_ID_LEN);
-        j++;
-    }
-    *numnodes = j;
-    ids[j] = NULL; /* Null term so that FreeClusterNodesList does not need
-                    * to also get the count argument. */
-    dictReleaseIterator(di);
-    return ids;
+    return getClusterNodesList(numnodes);
 }
 
 /* Free the node list obtained with RedisModule_GetClusterNodesList. */
@@ -8902,7 +9011,7 @@ void RM_FreeClusterNodesList(char **ids) {
  * is disabled. */
 const char *RM_GetMyClusterID(void) {
     if (!server.cluster_enabled) return NULL;
-    return server.cluster->myself->name;
+    return getMyClusterId();
 }
 
 /* Return the number of nodes in the cluster, regardless of their state
@@ -8911,7 +9020,7 @@ const char *RM_GetMyClusterID(void) {
  * cluster mode, zero is returned. */
 size_t RM_GetClusterSize(void) {
     if (!server.cluster_enabled) return 0;
-    return dictSize(server.cluster->nodes);
+    return getClusterSize();
 }
 
 /* Populate the specified info for the node having as ID the specified 'id',
@@ -8938,20 +9047,19 @@ int RM_GetClusterNodeInfo(RedisModuleCtx *ctx, const char *id, char *ip, char *m
     UNUSED(ctx);
 
     clusterNode *node = clusterLookupNode(id, strlen(id));
-    if (node == NULL ||
-        node->flags & (CLUSTER_NODE_NOADDR|CLUSTER_NODE_HANDSHAKE))
+    if (node == NULL || clusterNodePending(node))
     {
         return REDISMODULE_ERR;
     }
 
-    if (ip) redis_strlcpy(ip,node->ip,NET_IP_STR_LEN);
+    if (ip) redis_strlcpy(ip, clusterNodeIp(node),NET_IP_STR_LEN);
 
     if (master_id) {
         /* If the information is not available, the function will set the
          * field to zero bytes, so that when the field can't be populated the
          * function kinda remains predictable. */
-        if (node->flags & CLUSTER_NODE_SLAVE && node->slaveof)
-            memcpy(master_id,node->slaveof->name,REDISMODULE_NODE_ID_LEN);
+        if (clusterNodeIsSlave(node) && clusterNodeGetSlaveof(node))
+            memcpy(master_id, clusterNodeGetName(clusterNodeGetSlaveof(node)) ,REDISMODULE_NODE_ID_LEN);
         else
             memset(master_id,0,REDISMODULE_NODE_ID_LEN);
     }
@@ -8961,12 +9069,12 @@ int RM_GetClusterNodeInfo(RedisModuleCtx *ctx, const char *id, char *ip, char *m
      * we can provide binary compatibility. */
     if (flags) {
         *flags = 0;
-        if (node->flags & CLUSTER_NODE_MYSELF) *flags |= REDISMODULE_NODE_MYSELF;
-        if (node->flags & CLUSTER_NODE_MASTER) *flags |= REDISMODULE_NODE_MASTER;
-        if (node->flags & CLUSTER_NODE_SLAVE) *flags |= REDISMODULE_NODE_SLAVE;
-        if (node->flags & CLUSTER_NODE_PFAIL) *flags |= REDISMODULE_NODE_PFAIL;
-        if (node->flags & CLUSTER_NODE_FAIL) *flags |= REDISMODULE_NODE_FAIL;
-        if (node->flags & CLUSTER_NODE_NOFAILOVER) *flags |= REDISMODULE_NODE_NOFAILOVER;
+        if (clusterNodeIsMyself(node)) *flags |= REDISMODULE_NODE_MYSELF;
+        if (clusterNodeIsMaster(node)) *flags |= REDISMODULE_NODE_MASTER;
+        if (clusterNodeIsSlave(node)) *flags |= REDISMODULE_NODE_SLAVE;
+        if (clusterNodeTimedOut(node)) *flags |= REDISMODULE_NODE_PFAIL;
+        if (clusterNodeIsFailing(node)) *flags |= REDISMODULE_NODE_FAIL;
+        if (clusterNodeIsNoFailover(node)) *flags |= REDISMODULE_NODE_NOFAILOVER;
     }
     return REDISMODULE_OK;
 }
@@ -8997,6 +9105,19 @@ void RM_SetClusterFlags(RedisModuleCtx *ctx, uint64_t flags) {
         server.cluster_module_flags |= CLUSTER_MODULE_FLAG_NO_REDIRECTION;
 }
 
+/* Returns the cluster slot of a key, similar to the `CLUSTER KEYSLOT` command.
+ * This function works even if cluster mode is not enabled. */
+unsigned int RM_ClusterKeySlot(RedisModuleString *key) {
+    return keyHashSlot(key->ptr, sdslen(key->ptr));
+}
+
+/* Returns a short string that can be used as a key or as a hash tag in a key,
+ * such that the key maps to the given cluster slot. Returns NULL if slot is not
+ * a valid slot. */
+const char *RM_ClusterCanonicalKeyNameInSlot(unsigned int slot) {
+    return (slot < CLUSTER_SLOTS) ? crc16_slot_table[slot] : NULL;
+}
+
 /* --------------------------------------------------------------------------
  * ## Modules Timers API
  *
@@ -9102,7 +9223,7 @@ RedisModuleTimerID RM_CreateTimer(RedisModuleCtx *ctx, mstime_t period, RedisMod
 
     while(1) {
         key = htonu64(expiretime);
-        if (raxFind(Timers, (unsigned char*)&key,sizeof(key)) == raxNotFound) {
+        if (!raxFind(Timers, (unsigned char*)&key,sizeof(key),NULL)) {
             raxInsert(Timers,(unsigned char*)&key,sizeof(key),timer,NULL);
             break;
         } else {
@@ -9141,8 +9262,11 @@ RedisModuleTimerID RM_CreateTimer(RedisModuleCtx *ctx, mstime_t period, RedisMod
  * If not NULL, the data pointer is set to the value of the data argument when
  * the timer was created. */
 int RM_StopTimer(RedisModuleCtx *ctx, RedisModuleTimerID id, void **data) {
-    RedisModuleTimer *timer = raxFind(Timers,(unsigned char*)&id,sizeof(id));
-    if (timer == raxNotFound || timer->module != ctx->module)
+    void *result;
+    if (!raxFind(Timers,(unsigned char*)&id,sizeof(id),&result))
+        return REDISMODULE_ERR;
+    RedisModuleTimer *timer = result;
+    if (timer->module != ctx->module)
         return REDISMODULE_ERR;
     if (data) *data = timer->data;
     raxRemove(Timers,(unsigned char*)&id,sizeof(id),NULL);
@@ -9157,8 +9281,11 @@ int RM_StopTimer(RedisModuleCtx *ctx, RedisModuleTimerID id, void **data) {
  * REDISMODULE_OK is returned. The arguments remaining or data can be NULL if
  * the caller does not need certain information. */
 int RM_GetTimerInfo(RedisModuleCtx *ctx, RedisModuleTimerID id, uint64_t *remaining, void **data) {
-    RedisModuleTimer *timer = raxFind(Timers,(unsigned char*)&id,sizeof(id));
-    if (timer == raxNotFound || timer->module != ctx->module)
+    void *result;
+    if (!raxFind(Timers,(unsigned char*)&id,sizeof(id),&result))
+        return REDISMODULE_ERR;
+    RedisModuleTimer *timer = result;
+    if (timer->module != ctx->module)
         return REDISMODULE_ERR;
     if (remaining) {
         int64_t rem = ntohu64(id)-ustime();
@@ -9428,15 +9555,7 @@ void revokeClientAuthentication(client *c) {
      * is eventually freed we don't rely on the module to still exist. */
     moduleNotifyUserChanged(c);
 
-    c->user = DefaultUser;
-    c->authenticated = 0;
-    /* We will write replies to this client later, so we can't close it
-     * directly even if async. */
-    if (c == server.current_client) {
-        c->flags |= CLIENT_CLOSE_AFTER_COMMAND;
-    } else {
-        freeClientAsync(c);
-    }
+    deauthenticateAndCloseClient(c);
 }
 
 /* Cleanup all clients that have been authenticated with this module. This
@@ -9926,9 +10045,10 @@ int RM_DictReplace(RedisModuleDict *d, RedisModuleString *key, void *ptr) {
  * be set by reference to 1 if the key does not exist, or to 0 if the key
  * exists. */
 void *RM_DictGetC(RedisModuleDict *d, void *key, size_t keylen, int *nokey) {
-    void *res = raxFind(d->rax,key,keylen);
-    if (nokey) *nokey = (res == raxNotFound);
-    return (res == raxNotFound) ? NULL : res;
+    void *res = NULL;
+    int found = raxFind(d->rax,key,keylen,&res);
+    if (nokey) *nokey = !found;
+    return res;
 }
 
 /* Like RedisModule_DictGetC() but takes the key as a RedisModuleString. */
@@ -10350,8 +10470,10 @@ void RM_FreeServerInfo(RedisModuleCtx *ctx, RedisModuleServerInfoData *data) {
  * mechanism to release the returned string. Return value will be NULL if the
  * field was not found. */
 RedisModuleString *RM_ServerInfoGetField(RedisModuleCtx *ctx, RedisModuleServerInfoData *data, const char* field) {
-    sds val = raxFind(data->rax, (unsigned char *)field, strlen(field));
-    if (val == raxNotFound) return NULL;
+    void *result;
+    if (!raxFind(data->rax, (unsigned char *)field, strlen(field), &result))
+        return NULL;
+    sds val = result;
     RedisModuleString *o = createStringObject(val,sdslen(val));
     if (ctx != NULL) autoMemoryAdd(ctx,REDISMODULE_AM_STRING,o);
     return o;
@@ -10359,9 +10481,9 @@ RedisModuleString *RM_ServerInfoGetField(RedisModuleCtx *ctx, RedisModuleServerI
 
 /* Similar to RM_ServerInfoGetField, but returns a char* which should not be freed but the caller. */
 const char *RM_ServerInfoGetFieldC(RedisModuleServerInfoData *data, const char* field) {
-    sds val = raxFind(data->rax, (unsigned char *)field, strlen(field));
-    if (val == raxNotFound) return NULL;
-    return val;
+    void *result = NULL;
+    raxFind(data->rax, (unsigned char *)field, strlen(field), &result);
+    return result;
 }
 
 /* Get the value of a field from data collected with RM_GetServerInfo(). If the
@@ -10369,11 +10491,12 @@ const char *RM_ServerInfoGetFieldC(RedisModuleServerInfoData *data, const char*
  * 0, and the optional out_err argument will be set to REDISMODULE_ERR. */
 long long RM_ServerInfoGetFieldSigned(RedisModuleServerInfoData *data, const char* field, int *out_err) {
     long long ll;
-    sds val = raxFind(data->rax, (unsigned char *)field, strlen(field));
-    if (val == raxNotFound) {
+    void *result;
+    if (!raxFind(data->rax, (unsigned char *)field, strlen(field), &result)) {
         if (out_err) *out_err = REDISMODULE_ERR;
         return 0;
     }
+    sds val = result;
     if (!string2ll(val,sdslen(val),&ll)) {
         if (out_err) *out_err = REDISMODULE_ERR;
         return 0;
@@ -10387,11 +10510,12 @@ long long RM_ServerInfoGetFieldSigned(RedisModuleServerInfoData *data, const cha
  * 0, and the optional out_err argument will be set to REDISMODULE_ERR. */
 unsigned long long RM_ServerInfoGetFieldUnsigned(RedisModuleServerInfoData *data, const char* field, int *out_err) {
     unsigned long long ll;
-    sds val = raxFind(data->rax, (unsigned char *)field, strlen(field));
-    if (val == raxNotFound) {
+    void *result;
+    if (!raxFind(data->rax, (unsigned char *)field, strlen(field), &result)) {
         if (out_err) *out_err = REDISMODULE_ERR;
         return 0;
     }
+    sds val = result;
     if (!string2ull(val,&ll)) {
         if (out_err) *out_err = REDISMODULE_ERR;
         return 0;
@@ -10405,11 +10529,12 @@ unsigned long long RM_ServerInfoGetFieldUnsigned(RedisModuleServerInfoData *data
  * optional out_err argument will be set to REDISMODULE_ERR. */
 double RM_ServerInfoGetFieldDouble(RedisModuleServerInfoData *data, const char* field, int *out_err) {
     double dbl;
-    sds val = raxFind(data->rax, (unsigned char *)field, strlen(field));
-    if (val == raxNotFound) {
+    void *result;
+    if (!raxFind(data->rax, (unsigned char *)field, strlen(field), &result)) {
         if (out_err) *out_err = REDISMODULE_ERR;
         return 0;
     }
+    sds val = result;
     if (!string2d(val,sdslen(val),&dbl)) {
         if (out_err) *out_err = REDISMODULE_ERR;
         return 0;
@@ -10834,7 +10959,7 @@ typedef struct {
 } ScanCBData;
 
 typedef struct RedisModuleScanCursor{
-    unsigned long cursor;
+    unsigned long long cursor;
     int done;
 }RedisModuleScanCursor;
 
@@ -10936,7 +11061,7 @@ int RM_Scan(RedisModuleCtx *ctx, RedisModuleScanCursor *cursor, RedisModuleScanC
     }
     int ret = 1;
     ScanCBData data = { ctx, privdata, fn };
-    cursor->cursor = dictScan(ctx->client->db->dict, cursor->cursor, moduleScanCallback, &data);
+    cursor->cursor = dbScan(ctx->client->db, cursor->cursor, moduleScanCallback, &data);
     if (cursor->cursor == 0) {
         cursor->done = 1;
         ret = 0;
@@ -10956,18 +11081,27 @@ static void moduleScanKeyCallback(void *privdata, const dictEntry *de) {
     ScanKeyCBData *data = privdata;
     sds key = dictGetKey(de);
     robj *o = data->key->value;
-    robj *field = createStringObject(key, sdslen(key));
+    robj *field = NULL;
     robj *value = NULL;
     if (o->type == OBJ_SET) {
         value = NULL;
     } else if (o->type == OBJ_HASH) {
         sds val = dictGetVal(de);
+
+        /* If field is expired, then ignore */
+        if (hfieldIsExpired(key))
+            return;
+
+        field = createStringObject(key, hfieldlen(key));
         value = createStringObject(val, sdslen(val));
     } else if (o->type == OBJ_ZSET) {
         double *val = (double*)dictGetVal(de);
         value = createStringObjectFromLongDouble(*val, 0);
     }
 
+    /* if type is OBJ_HASH then key is of type hfield. Otherwise sds. */
+    if (!field) field = createStringObject(key, sdslen(key));
+
     data->fn(data->key, field, value, data->user_data);
     decrRefCount(field);
     if (value) decrRefCount(value);
@@ -11066,22 +11200,44 @@ int RM_ScanKey(RedisModuleKey *key, RedisModuleScanCursor *cursor, RedisModuleSc
         cursor->done = 1;
         ret = 0;
     } else if (o->type == OBJ_ZSET || o->type == OBJ_HASH) {
-        unsigned char *p = lpSeek(o->ptr,0);
-        unsigned char *vstr;
-        unsigned int vlen;
-        long long vll;
+        unsigned char *lp, *p;
+        /* is hash with expiry on fields, then lp tuples are [field][value][expire] */
+        int hfe = o->type == OBJ_HASH && o->encoding == OBJ_ENCODING_LISTPACK_EX;
+
+        if (o->type == OBJ_HASH)
+            lp = hashTypeListpackGetLp(o);
+        else
+            lp = o->ptr;
+
+        p = lpSeek(lp,0);
         while(p) {
-            vstr = lpGetValue(p,&vlen,&vll);
-            robj *field = (vstr != NULL) ?
-                createStringObject((char*)vstr,vlen) :
-                createStringObjectFromLongLongWithSds(vll);
-            p = lpNext(o->ptr,p);
-            vstr = lpGetValue(p,&vlen,&vll);
-            robj *value = (vstr != NULL) ?
-                createStringObject((char*)vstr,vlen) :
-                createStringObjectFromLongLongWithSds(vll);
+            long long vllField, vllValue, vllExpire;
+            unsigned int lenField, lenValue;
+            unsigned char *pField, *pValue;
+
+            pField = lpGetValue(p,&lenField,&vllField);
+            p = lpNext(lp,p);
+            pValue = lpGetValue(p,&lenValue,&vllValue);
+            p = lpNext(lp,p);
+
+            if (hfe) {
+                serverAssert(lpGetIntegerValue(p, &vllExpire));
+                p = lpNext(lp, p);
+
+                /* Skip expired fields */
+                if (hashTypeIsExpired(o, vllExpire))
+                    continue;
+            }
+
+            robj *value = (pValue != NULL) ?
+                          createStringObject((char*)pValue,lenValue) :
+                          createStringObjectFromLongLongWithSds(vllValue);
+
+            robj *field = (pField != NULL) ?
+                          createStringObject((char*)pField,lenField) :
+                          createStringObjectFromLongLongWithSds(vllField);
             fn(key, field, value, privdata);
-            p = lpNext(o->ptr,p);
+
             decrRefCount(field);
             decrRefCount(value);
         }
@@ -11093,7 +11249,6 @@ int RM_ScanKey(RedisModuleKey *key, RedisModuleScanCursor *cursor, RedisModuleSc
     return ret;
 }
 
-
 /* --------------------------------------------------------------------------
  * ## Module fork API
  * -------------------------------------------------------------------------- */
@@ -11848,6 +12003,7 @@ void moduleInitModulesSystem(void) {
     moduleUnblockedClients = listCreate();
     server.loadmodule_queue = listCreate();
     server.module_configs_queue = dictCreate(&sdsKeyValueHashDictType);
+    server.module_gil_acquring = 0;
     modules = dictCreate(&modulesDictType);
     moduleAuthCallbacks = listCreate();
 
@@ -11937,6 +12093,13 @@ void moduleRemoveConfigs(RedisModule *module) {
     }
 }
 
+/* Remove ACL categories added by the module when it fails to load. */
+void moduleRemoveCateogires(RedisModule *module) {
+    if (module->num_acl_categories_added) {
+        ACLCleanupCategoriesOnFailure(module->num_acl_categories_added);
+    }
+}
+
 /* Load all the modules in the server.loadmodule_queue list, which is
  * populated by `loadmodule` directives in the configuration file.
  * We can't load modules directly when processing the configuration file
@@ -12115,6 +12278,19 @@ int parseLoadexArguments(RedisModuleString ***module_argv, int *module_argc) {
     return REDISMODULE_OK;
 }
 
+/* Unregister module-related things, called when moduleLoad fails or moduleUnload. */
+void moduleUnregisterCleanup(RedisModule *module) {
+    moduleFreeAuthenticatedClients(module);
+    moduleUnregisterCommands(module);
+    moduleUnsubscribeNotifications(module);
+    moduleUnregisterSharedAPI(module);
+    moduleUnregisterUsedAPI(module);
+    moduleUnregisterFilters(module);
+    moduleUnsubscribeAllServerEvents(module);
+    moduleRemoveConfigs(module);
+    moduleUnregisterAuthCBs(module);
+}
+
 /* Load a module and initialize it. On success C_OK is returned, otherwise
  * C_ERR is returned. */
 int moduleLoad(const char *path, void **module_argv, int module_argc, int is_loadex) {
@@ -12149,11 +12325,8 @@ int moduleLoad(const char *path, void **module_argv, int module_argc, int is_loa
         serverLog(LL_WARNING,
             "Module %s initialization failed. Module not loaded",path);
         if (ctx.module) {
-            moduleUnregisterCommands(ctx.module);
-            moduleUnregisterSharedAPI(ctx.module);
-            moduleUnregisterUsedAPI(ctx.module);
-            moduleRemoveConfigs(ctx.module);
-            moduleUnregisterAuthCBs(ctx.module);
+            moduleUnregisterCleanup(ctx.module);
+            moduleRemoveCateogires(ctx.module);
             moduleFreeModuleStructure(ctx.module);
         }
         moduleFreeContext(&ctx);
@@ -12194,8 +12367,6 @@ int moduleLoad(const char *path, void **module_argv, int module_argc, int is_loa
     }
 
     if (post_load_err) {
-        /* Unregister module auth callbacks (if any exist) that this Module registered onload. */
-        moduleUnregisterAuthCBs(ctx.module);
         moduleUnload(ctx.module->name, NULL);
         moduleFreeContext(&ctx);
         return C_ERR;
@@ -12253,17 +12424,7 @@ int moduleUnload(sds name, const char **errmsg) {
         }
     }
 
-    moduleFreeAuthenticatedClients(module);
-    moduleUnregisterCommands(module);
-    moduleUnregisterSharedAPI(module);
-    moduleUnregisterUsedAPI(module);
-    moduleUnregisterFilters(module);
-    moduleUnregisterAuthCBs(module);
-    moduleRemoveConfigs(module);
-
-    /* Remove any notification subscribers this module might have */
-    moduleUnsubscribeNotifications(module);
-    moduleUnsubscribeAllServerEvents(module);
+    moduleUnregisterCleanup(module);
 
     /* Unload the dynamic library. */
     if (dlclose(module->handle) == -1) {
@@ -12395,7 +12556,7 @@ sds genModulesInfoString(sds info) {
  * -------------------------------------------------------------------------- */
 	 
 /* Check if the configuration name is already registered */
-int isModuleConfigNameRegistered(RedisModule *module, sds name) {
+int isModuleConfigNameRegistered(RedisModule *module, const char *name) {
     listNode *match = listSearchKey(module->module_configs, (void *) name);
     return match != NULL;
 }
@@ -12424,12 +12585,14 @@ int moduleVerifyConfigFlags(unsigned int flags, configType type) {
     return REDISMODULE_OK;
 }
 
-int moduleVerifyConfigName(sds name) {
-    if (sdslen(name) == 0) {
-        serverLogRaw(LL_WARNING, "Module config names cannot be an empty string.");
+/* Verify a module resource or name has only alphanumeric characters, underscores
+ * or dashes. */
+int moduleVerifyResourceName(const char *name) {
+    if (name[0] == '\0') {
         return REDISMODULE_ERR;
     }
-    for (size_t i = 0 ; i < sdslen(name) ; ++i) {
+
+    for (size_t i = 0; name[i] != '\0'; i++) {
         char curr_char = name[i];
         if ((curr_char >= 'a' && curr_char <= 'z') ||
             (curr_char >= 'A' && curr_char <= 'Z') ||
@@ -12438,7 +12601,7 @@ int moduleVerifyConfigName(sds name) {
         {
             continue;
         }
-        serverLog(LL_WARNING, "Invalid character %c in Module Config name %s.", curr_char, name);
+        serverLog(LL_WARNING, "Invalid character %c in Module resource name %s.", curr_char, name);
         return REDISMODULE_ERR;
     }
     return REDISMODULE_OK;
@@ -12583,21 +12746,21 @@ int moduleConfigApplyConfig(list *module_configs, const char **err, const char *
  * -------------------------------------------------------------------------- */
 
 /* Create a module config object. */
-ModuleConfig *createModuleConfig(sds name, RedisModuleConfigApplyFunc apply_fn, void *privdata, RedisModule *module) {
+ModuleConfig *createModuleConfig(const char *name, RedisModuleConfigApplyFunc apply_fn, void *privdata, RedisModule *module) {
     ModuleConfig *new_config = zmalloc(sizeof(ModuleConfig));
-    new_config->name = sdsdup(name);
+    new_config->name = sdsnew(name);
     new_config->apply_fn = apply_fn;
     new_config->privdata = privdata;
     new_config->module = module;
     return new_config;
 }
 
-int moduleConfigValidityCheck(RedisModule *module, sds name, unsigned int flags, configType type) {
+int moduleConfigValidityCheck(RedisModule *module, const char *name, unsigned int flags, configType type) {
     if (!module->onload) {
         errno = EBUSY;
         return REDISMODULE_ERR;
     }
-    if (moduleVerifyConfigFlags(flags, type) || moduleVerifyConfigName(name)) {
+    if (moduleVerifyConfigFlags(flags, type) || moduleVerifyResourceName(name)) {
         errno = EINVAL;
         return REDISMODULE_ERR;
     }
@@ -12708,13 +12871,10 @@ unsigned int maskModuleEnumConfigFlags(unsigned int flags) {
  * * EALREADY: The provided configuration name is already used. */
 int RM_RegisterStringConfig(RedisModuleCtx *ctx, const char *name, const char *default_val, unsigned int flags, RedisModuleConfigGetStringFunc getfn, RedisModuleConfigSetStringFunc setfn, RedisModuleConfigApplyFunc applyfn, void *privdata) {
     RedisModule *module = ctx->module;
-    sds config_name = sdsnew(name);
-    if (moduleConfigValidityCheck(module, config_name, flags, NUMERIC_CONFIG)) {
-        sdsfree(config_name);
+    if (moduleConfigValidityCheck(module, name, flags, NUMERIC_CONFIG)) {
         return REDISMODULE_ERR;
     }
-    ModuleConfig *new_config = createModuleConfig(config_name, applyfn, privdata, module);
-    sdsfree(config_name);
+    ModuleConfig *new_config = createModuleConfig(name, applyfn, privdata, module);
     new_config->get_fn.get_string = getfn;
     new_config->set_fn.set_string = setfn;
     listAddNodeTail(module->module_configs, new_config);
@@ -12728,13 +12888,10 @@ int RM_RegisterStringConfig(RedisModuleCtx *ctx, const char *name, const char *d
  * RedisModule_RegisterStringConfig for detailed information about configs. */
 int RM_RegisterBoolConfig(RedisModuleCtx *ctx, const char *name, int default_val, unsigned int flags, RedisModuleConfigGetBoolFunc getfn, RedisModuleConfigSetBoolFunc setfn, RedisModuleConfigApplyFunc applyfn, void *privdata) {
     RedisModule *module = ctx->module;
-    sds config_name = sdsnew(name);
-    if (moduleConfigValidityCheck(module, config_name, flags, BOOL_CONFIG)) {
-        sdsfree(config_name);
+    if (moduleConfigValidityCheck(module, name, flags, BOOL_CONFIG)) {
         return REDISMODULE_ERR;
     }
-    ModuleConfig *new_config = createModuleConfig(config_name, applyfn, privdata, module);
-    sdsfree(config_name);
+    ModuleConfig *new_config = createModuleConfig(name, applyfn, privdata, module);
     new_config->get_fn.get_bool = getfn;
     new_config->set_fn.set_bool = setfn;
     listAddNodeTail(module->module_configs, new_config);
@@ -12774,13 +12931,10 @@ int RM_RegisterBoolConfig(RedisModuleCtx *ctx, const char *name, int default_val
  * See RedisModule_RegisterStringConfig for detailed general information about configs. */
 int RM_RegisterEnumConfig(RedisModuleCtx *ctx, const char *name, int default_val, unsigned int flags, const char **enum_values, const int *int_values, int num_enum_vals, RedisModuleConfigGetEnumFunc getfn, RedisModuleConfigSetEnumFunc setfn, RedisModuleConfigApplyFunc applyfn, void *privdata) {
     RedisModule *module = ctx->module;
-    sds config_name = sdsnew(name);
-    if (moduleConfigValidityCheck(module, config_name, flags, ENUM_CONFIG)) {
-        sdsfree(config_name);
+    if (moduleConfigValidityCheck(module, name, flags, ENUM_CONFIG)) {
         return REDISMODULE_ERR;
     }
-    ModuleConfig *new_config = createModuleConfig(config_name, applyfn, privdata, module);
-    sdsfree(config_name);
+    ModuleConfig *new_config = createModuleConfig(name, applyfn, privdata, module);
     new_config->get_fn.get_enum = getfn;
     new_config->set_fn.set_enum = setfn;
     configEnum *enum_vals = zmalloc((num_enum_vals + 1) * sizeof(configEnum));
@@ -12802,13 +12956,10 @@ int RM_RegisterEnumConfig(RedisModuleCtx *ctx, const char *name, int default_val
  * RedisModule_RegisterStringConfig for detailed information about configs. */
 int RM_RegisterNumericConfig(RedisModuleCtx *ctx, const char *name, long long default_val, unsigned int flags, long long min, long long max, RedisModuleConfigGetNumericFunc getfn, RedisModuleConfigSetNumericFunc setfn, RedisModuleConfigApplyFunc applyfn, void *privdata) {
     RedisModule *module = ctx->module;
-    sds config_name = sdsnew(name);
-    if (moduleConfigValidityCheck(module, config_name, flags, NUMERIC_CONFIG)) {
-        sdsfree(config_name);
+    if (moduleConfigValidityCheck(module, name, flags, NUMERIC_CONFIG)) {
         return REDISMODULE_ERR;
     }
-    ModuleConfig *new_config = createModuleConfig(config_name, applyfn, privdata, module);
-    sdsfree(config_name);
+    ModuleConfig *new_config = createModuleConfig(name, applyfn, privdata, module);
     new_config->get_fn.get_numeric = getfn;
     new_config->set_fn.set_numeric = setfn;
     listAddNodeTail(module->module_configs, new_config);
@@ -13497,7 +13648,9 @@ void moduleRegisterCoreAPI(void) {
     REGISTER_API(Alloc);
     REGISTER_API(TryAlloc);
     REGISTER_API(Calloc);
+    REGISTER_API(TryCalloc);
     REGISTER_API(Realloc);
+    REGISTER_API(TryRealloc);
     REGISTER_API(Free);
     REGISTER_API(Strdup);
     REGISTER_API(CreateCommand);
@@ -13505,6 +13658,7 @@ void moduleRegisterCoreAPI(void) {
     REGISTER_API(CreateSubcommand);
     REGISTER_API(SetCommandInfo);
     REGISTER_API(SetCommandACLCategories);
+    REGISTER_API(AddACLCategory);
     REGISTER_API(SetModuleAttribs);
     REGISTER_API(IsModuleNameBusy);
     REGISTER_API(WrongArity);
@@ -13723,6 +13877,8 @@ void moduleRegisterCoreAPI(void) {
     REGISTER_API(SetDisconnectCallback);
     REGISTER_API(GetBlockedClientHandle);
     REGISTER_API(SetClusterFlags);
+    REGISTER_API(ClusterKeySlot);
+    REGISTER_API(ClusterCanonicalKeyNameInSlot);
     REGISTER_API(CreateDict);
     REGISTER_API(FreeDict);
     REGISTER_API(DictSize);
diff --git a/src/modules/helloblock.c b/src/modules/helloblock.c
index dc3d74975f9..a956c78cebd 100644
--- a/src/modules/helloblock.c
+++ b/src/modules/helloblock.c
@@ -3,32 +3,11 @@
  *
  * -----------------------------------------------------------------------------
  *
- * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2016-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "../redismodule.h"
diff --git a/src/modules/hellocluster.c b/src/modules/hellocluster.c
index bc145c2b225..95f468cbe07 100644
--- a/src/modules/hellocluster.c
+++ b/src/modules/hellocluster.c
@@ -2,32 +2,11 @@
  *
  * -----------------------------------------------------------------------------
  *
- * Copyright (c) 2018, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2018-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "../redismodule.h"
diff --git a/src/modules/hellodict.c b/src/modules/hellodict.c
index 12b6e91d254..607c3e31044 100644
--- a/src/modules/hellodict.c
+++ b/src/modules/hellodict.c
@@ -5,32 +5,11 @@
  *
  * -----------------------------------------------------------------------------
  *
- * Copyright (c) 2018, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2018-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "../redismodule.h"
diff --git a/src/modules/hellohook.c b/src/modules/hellohook.c
index 2859a8b26a2..836f43a9bec 100644
--- a/src/modules/hellohook.c
+++ b/src/modules/hellohook.c
@@ -2,32 +2,11 @@
  *
  * -----------------------------------------------------------------------------
  *
- * Copyright (c) 2019, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2019-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "../redismodule.h"
diff --git a/src/modules/hellotimer.c b/src/modules/hellotimer.c
index 67e1e671430..b891c41c367 100644
--- a/src/modules/hellotimer.c
+++ b/src/modules/hellotimer.c
@@ -2,32 +2,11 @@
  *
  * -----------------------------------------------------------------------------
  *
- * Copyright (c) 2018, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2018-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "../redismodule.h"
diff --git a/src/modules/hellotype.c b/src/modules/hellotype.c
index 1dc53d24c16..16343aa4e54 100644
--- a/src/modules/hellotype.c
+++ b/src/modules/hellotype.c
@@ -7,32 +7,11 @@
  *
  * -----------------------------------------------------------------------------
  *
- * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2016-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "../redismodule.h"
diff --git a/src/modules/helloworld.c b/src/modules/helloworld.c
index e5179631018..cdbc8c89c37 100644
--- a/src/modules/helloworld.c
+++ b/src/modules/helloworld.c
@@ -6,32 +6,11 @@
  *
  * -----------------------------------------------------------------------------
  *
- * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2016-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "../redismodule.h"
diff --git a/src/monotonic.c b/src/monotonic.c
index 1d71962f304..6da03677bda 100644
--- a/src/monotonic.c
+++ b/src/monotonic.c
@@ -3,10 +3,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <time.h>
-
-#undef NDEBUG
-#include <assert.h>
-
+#include "redisassert.h"
 
 /* The function pointer for clock retrieval.  */
 monotime (*getMonotonicUs)(void) = NULL;
diff --git a/src/mstr.c b/src/mstr.c
new file mode 100644
index 00000000000..39200d7314b
--- /dev/null
+++ b/src/mstr.c
@@ -0,0 +1,524 @@
+/*
+ * Copyright Redis Ltd. 2024 - present
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0 (RSALv2)
+ * or the Server Side Public License v1 (SSPLv1).
+ */
+
+#include <string.h>
+#include <assert.h>
+#include "sdsalloc.h"
+#include "mstr.h"
+#include "stdio.h"
+
+#define NULL_SIZE 1
+
+static inline char mstrReqType(size_t string_size);
+static inline int mstrHdrSize(char type);
+static inline int mstrSumMetaLen(mstrKind *k, mstrFlags flags);
+static inline size_t mstrAllocLen(const mstr s, struct mstrKind *kind);
+
+/*** mstr API ***/
+
+/* Create mstr without any metadata attached, based on string 'initStr'.
+ * - If initStr equals NULL, then only allocation will be made.
+ * - string of mstr is always null-terminated.
+ */
+mstr mstrNew(const char *initStr, size_t lenStr, int trymalloc) {
+    unsigned char *pInfo; /* pointer to mstr info field */
+    void *sh;
+    mstr s;
+    char type = mstrReqType(lenStr);
+    int mstrHdr = mstrHdrSize(type);
+
+    assert(lenStr + mstrHdr + 1 > lenStr); /* Catch size_t overflow */
+
+    size_t len = mstrHdr + lenStr + NULL_SIZE;
+    sh = trymalloc? s_trymalloc(len) : s_malloc(len);
+
+    if (sh == NULL) return NULL;
+
+    s = (char*)sh + mstrHdr;
+    pInfo = ((unsigned char*)s) - 1;
+
+    switch(type) {
+        case MSTR_TYPE_5: {
+            *pInfo = CREATE_MSTR_INFO(lenStr, 0 /*ismeta*/, type);
+            break;
+        }
+        case MSTR_TYPE_8: {
+            MSTR_HDR_VAR(8,s);
+            *pInfo = CREATE_MSTR_INFO(0 /*unused*/, 0 /*ismeta*/, type);
+            sh->len = lenStr;
+            break;
+        }
+        case MSTR_TYPE_16: {
+            MSTR_HDR_VAR(16,s);
+            *pInfo = CREATE_MSTR_INFO(0 /*unused*/, 0 /*ismeta*/, type);
+            sh->len = lenStr;
+            break;
+        }
+        case MSTR_TYPE_64: {
+            MSTR_HDR_VAR(64,s);
+            *pInfo = CREATE_MSTR_INFO(0 /*unused*/, 0 /*ismeta*/, type);
+            sh->len = lenStr;
+            break;
+        }
+    }
+
+    if (initStr && lenStr)
+        memcpy(s, initStr, lenStr);
+
+    s[lenStr] = '\0';
+    return s;
+}
+
+/* Creates mstr with given string. Reserve space for metadata.
+ *
+ * Note: mstrNew(s,l) and mstrNewWithMeta(s,l,0) are not the same. The first allocates
+ * just string. The second allocates a string with flags (yet without any metadata
+ * structures allocated).
+ */
+mstr mstrNewWithMeta(struct mstrKind *kind, const char *initStr, size_t lenStr, mstrFlags metaFlags, int trymalloc) {
+    unsigned char *pInfo; /* pointer to mstr info field */
+    char *allocMstr;
+    mstr mstrPtr;
+    char type = mstrReqType(lenStr);
+    int mstrHdr = mstrHdrSize(type);
+    int sumMetaLen = mstrSumMetaLen(kind, metaFlags);
+
+
+    /* mstrSumMetaLen() + sizeof(mstrFlags) + sizeof(mstrhdrX) + lenStr  */
+
+    size_t allocLen = sumMetaLen + sizeof(mstrFlags) + mstrHdr + lenStr + NULL_SIZE;
+    allocMstr = trymalloc? s_trymalloc(allocLen) : s_malloc(allocLen);
+
+    if (allocMstr == NULL) return NULL;
+
+    /* metadata is located at the beginning of the allocation, then meta-flags and lastly the string */
+    mstrFlags *pMetaFlags = (mstrFlags *) (allocMstr + sumMetaLen) ;
+    mstrPtr = ((char*) pMetaFlags) + sizeof(mstrFlags) + mstrHdr;
+    pInfo = ((unsigned char*)mstrPtr) - 1;
+
+    switch(type) {
+        case MSTR_TYPE_5: {
+            *pInfo = CREATE_MSTR_INFO(lenStr, 1 /*ismeta*/, type);
+            break;
+        }
+        case MSTR_TYPE_8: {
+            MSTR_HDR_VAR(8, mstrPtr);
+            sh->len = lenStr;
+            *pInfo = CREATE_MSTR_INFO(0 /*unused*/, 1 /*ismeta*/, type);
+            break;
+        }
+        case MSTR_TYPE_16: {
+            MSTR_HDR_VAR(16, mstrPtr);
+            sh->len = lenStr;
+            *pInfo = CREATE_MSTR_INFO(0 /*unused*/, 1 /*ismeta*/, type);
+            break;
+        }
+        case MSTR_TYPE_64: {
+            MSTR_HDR_VAR(64, mstrPtr);
+            sh->len = lenStr;
+            *pInfo = CREATE_MSTR_INFO(0 /*unused*/, 1 /*ismeta*/, type);
+            break;
+        }
+    }
+    *pMetaFlags = metaFlags;
+    if (initStr != NULL) memcpy(mstrPtr, initStr, lenStr);
+    mstrPtr[lenStr] = '\0';
+
+    return mstrPtr;
+}
+
+/* Create copy of mstr. Flags can be modified. For each metadata flag, if
+ * same flag is set on both, then copy its metadata. */
+mstr mstrNewCopy(struct mstrKind *kind, mstr src, mstrFlags newFlags) {
+    mstr dst;
+
+    /* if no flags are set, then just copy the string */
+    if (newFlags == 0) return mstrNew(src, mstrlen(src), 0);
+
+    dst = mstrNewWithMeta(kind, src, mstrlen(src), newFlags, 0);
+    memcpy(dst, src, mstrlen(src) + 1);
+
+    /* if metadata is attached to src, then selectively copy metadata */
+    if (mstrIsMetaAttached(src)) {
+        mstrFlags *pFlags1 = mstrFlagsRef(src),
+                *pFlags2 = mstrFlagsRef(dst);
+
+        mstrFlags flags1Shift = *pFlags1,
+                flags2Shift = *pFlags2;
+
+        unsigned char *at1 = ((unsigned char *) pFlags1),
+                *at2 = ((unsigned char *) pFlags2);
+
+        /* if the flag is set on both, then copy the metadata */
+        for (int i = 0; flags1Shift != 0; ++i) {
+            int isFlag1Set = flags1Shift & 0x1;
+            int isFlag2Set = flags2Shift & 0x1;
+
+            if (isFlag1Set) at1 -= kind->metaSize[i];
+            if (isFlag2Set) at2 -= kind->metaSize[i];
+
+            if (isFlag1Set && isFlag2Set)
+                memcpy(at2, at1, kind->metaSize[i]);
+            flags1Shift >>= 1;
+            flags2Shift >>= 1;
+        }
+    }
+    return dst;
+}
+
+/* Free mstring. Note, mstrKind is required to eval sizeof metadata and find start
+ * of allocation but if mstrIsMetaAttached(s) is false, you can pass NULL as well.
+ */
+void mstrFree(struct mstrKind *kind, mstr s) {
+    if (s != NULL)
+        s_free(mstrGetAllocPtr(kind, s));
+}
+
+/* return ref to metadata flags. Useful to modify directly flags which doesn't
+ * include metadata payload */
+mstrFlags *mstrFlagsRef(mstr s) {
+    switch(s[-1]&MSTR_TYPE_MASK) {
+        case MSTR_TYPE_5:
+            return ((mstrFlags *) (s - sizeof(struct mstrhdr5))) - 1;
+        case MSTR_TYPE_8:
+            return ((mstrFlags *) (s - sizeof(struct mstrhdr8))) - 1;
+        case MSTR_TYPE_16:
+            return ((mstrFlags *) (s - sizeof(struct mstrhdr16))) - 1;
+        default: /* MSTR_TYPE_64: */
+            return ((mstrFlags *) (s - sizeof(struct mstrhdr64))) - 1;
+    }
+}
+
+/* Return a reference to corresponding metadata of the specified metadata flag
+ * index (flagIdx). If the metadata doesn't exist, it still returns a reference
+ * to the starting location where it would have been written among other metadatas.
+ * To verify if `flagIdx` of some metadata is attached, use `mstrGetFlag(s, flagIdx)`.
+ */
+void *mstrMetaRef(mstr s, struct mstrKind *kind, int flagIdx) {
+    int metaOffset = 0;
+    /* start iterating from flags backward */
+    mstrFlags *pFlags = mstrFlagsRef(s);
+    mstrFlags tmp = *pFlags;
+
+    for (int i = 0 ; i <= flagIdx ; ++i) {
+        if (tmp & 0x1) metaOffset += kind->metaSize[i];
+        tmp >>= 1;
+    }
+    return ((char *)pFlags) - metaOffset;
+}
+
+/* mstr layout: [meta-data#N]...[meta-data#0][mstrFlags][mstrhdr][string][null] */
+void *mstrGetAllocPtr(struct mstrKind *kind, mstr str) {
+    if (!mstrIsMetaAttached(str))
+        return (char*)str - mstrHdrSize(str[-1]);
+
+    int totalMetaLen = mstrSumMetaLen(kind, *mstrFlagsRef(str));
+    return (char*)str - mstrHdrSize(str[-1]) - sizeof(mstrFlags) - totalMetaLen;
+}
+
+/* Prints in the following fashion:
+ *   [0x7f8bd8816017] my_mstr: foo (strLen=3, mstrLen=11, isMeta=1, metaFlags=0x1)
+ *   [0x7f8bd8816010] >> meta[0]: 0x78 0x56 0x34 0x12 (metaLen=4)
+ */
+void mstrPrint(mstr s, struct mstrKind *kind, int verbose) {
+    mstrFlags mflags, tmp;
+    int isMeta = mstrIsMetaAttached(s);
+
+    tmp = mflags = (isMeta) ? *mstrFlagsRef(s) : 0;
+
+    if (!isMeta) {
+        printf("[%p] %s: %s (strLen=%zu, mstrLen=%zu, isMeta=0)\n",
+               (void *)s, kind->name, s, mstrlen(s), mstrAllocLen(s, kind));
+        return;
+    }
+
+    printf("[%p] %s: %s (strLen=%zu, mstrLen=%zu, isMeta=1, metaFlags=0x%x)\n",
+           (void *)s, kind->name, s, mstrlen(s), mstrAllocLen(s, kind),  mflags);
+
+    if (verbose) {
+        for (unsigned int i = 0 ; i < NUM_MSTR_FLAGS ; ++i) {
+            if (tmp & 0x1) {
+                int mSize = kind->metaSize[i];
+                void *mRef = mstrMetaRef(s, kind, i);
+                printf("[%p] >> meta[%d]:", mRef, i);
+                for (int j = 0 ; j < mSize ; ++j) {
+                    printf(" 0x%02x", ((unsigned char *) mRef)[j]);
+                }
+                printf(" (metaLen=%d)\n", mSize);
+            }
+            tmp >>= 1;
+        }
+    }
+}
+
+/* return length of the string (ignoring metadata attached) */
+size_t mstrlen(const mstr s) {
+    unsigned char info = s[-1];
+    switch(info & MSTR_TYPE_MASK) {
+        case MSTR_TYPE_5:
+            return MSTR_TYPE_5_LEN(info);
+        case MSTR_TYPE_8:
+            return MSTR_HDR(8,s)->len;
+        case MSTR_TYPE_16:
+            return MSTR_HDR(16,s)->len;
+        default: /* MSTR_TYPE_64: */
+            return MSTR_HDR(64,s)->len;
+    }
+}
+
+/*** mstr internals ***/
+
+static inline int mstrSumMetaLen(mstrKind *k, mstrFlags flags) {
+    int total = 0;
+    int i = 0 ;
+    while (flags) {
+        total += (flags & 0x1) ? k->metaSize[i] : 0;
+        flags >>= 1;
+        ++i;
+    }
+    return total;
+}
+
+/* mstrSumMetaLen() + sizeof(mstrFlags) + sizeof(mstrhdrX) + strlen + '\0' */
+static inline size_t mstrAllocLen(const mstr s, struct mstrKind *kind) {
+    int hdrlen;
+    mstrFlags *pMetaFlags;
+    size_t strlen = 0;
+
+    int isMeta = mstrIsMetaAttached(s);
+    unsigned char info = s[-1];
+
+    switch(info & MSTR_TYPE_MASK) {
+        case MSTR_TYPE_5:
+            strlen = MSTR_TYPE_5_LEN(info);
+            hdrlen = sizeof(struct mstrhdr5);
+            pMetaFlags = ((mstrFlags *) MSTR_HDR(5, s)) - 1;
+            break;
+        case MSTR_TYPE_8:
+            strlen = MSTR_HDR(8,s)->len;
+            hdrlen = sizeof(struct mstrhdr8);
+            pMetaFlags = ((mstrFlags *) MSTR_HDR(8, s)) - 1;
+            break;
+        case MSTR_TYPE_16:
+            strlen = MSTR_HDR(16,s)->len;
+            hdrlen = sizeof(struct mstrhdr16);
+            pMetaFlags = ((mstrFlags *) MSTR_HDR(16, s)) - 1;
+            break;
+        default: /* MSTR_TYPE_64: */
+            strlen = MSTR_HDR(64,s)->len;
+            hdrlen = sizeof(struct mstrhdr64);
+            pMetaFlags = ((mstrFlags *) MSTR_HDR(64, s)) - 1;
+            break;
+    }
+    return hdrlen + strlen + NULL_SIZE + ((isMeta) ? (mstrSumMetaLen(kind, *pMetaFlags) + sizeof(mstrFlags)) : 0);
+}
+
+/* returns pointer to the beginning of malloc() of mstr */
+void *mstrGetStartAlloc(mstr s, struct mstrKind *kind) {
+    int hdrlen;
+    mstrFlags *pMetaFlags;
+
+    int isMeta = mstrIsMetaAttached(s);
+
+    switch(s[-1]&MSTR_TYPE_MASK) {
+        case MSTR_TYPE_5:
+            hdrlen = sizeof(struct mstrhdr5);
+            pMetaFlags = ((mstrFlags *) MSTR_HDR(5, s)) - 1;
+            break;
+        case MSTR_TYPE_8:
+            hdrlen = sizeof(struct mstrhdr8);
+            pMetaFlags = ((mstrFlags *) MSTR_HDR(8, s)) - 1;
+            break;
+        case MSTR_TYPE_16:
+            hdrlen = sizeof(struct mstrhdr16);
+            pMetaFlags = ((mstrFlags *) MSTR_HDR(16, s)) - 1;
+            break;
+        default: /* MSTR_TYPE_64: */
+            hdrlen = sizeof(struct mstrhdr64);
+            pMetaFlags = ((mstrFlags *) MSTR_HDR(64, s)) - 1;
+            break;
+    }
+    return (char *) s - hdrlen -  ((isMeta) ? (mstrSumMetaLen(kind, *pMetaFlags) + sizeof(mstrFlags)) : 0);
+}
+
+static inline int mstrHdrSize(char type) {
+    switch(type&MSTR_TYPE_MASK) {
+        case MSTR_TYPE_5:
+            return sizeof(struct mstrhdr5);
+        case MSTR_TYPE_8:
+            return sizeof(struct mstrhdr8);
+        case MSTR_TYPE_16:
+            return sizeof(struct mstrhdr16);
+        case MSTR_TYPE_64:
+            return sizeof(struct mstrhdr64);
+    }
+    return 0;
+}
+
+static inline char mstrReqType(size_t string_size) {
+    if (string_size < 1<<5)
+        return MSTR_TYPE_5;
+    if (string_size < 1<<8)
+        return MSTR_TYPE_8;
+    if (string_size < 1<<16)
+        return MSTR_TYPE_16;
+    return MSTR_TYPE_64;
+}
+
+#ifdef REDIS_TEST
+#include <stdlib.h>
+#include <assert.h>
+#include "testhelp.h"
+#include "limits.h"
+
+#ifndef UNUSED
+#define UNUSED(x) (void)(x)
+#endif
+
+/* Challenge mstr with metadata interesting enough that can include the case of hfield and hkey and more */
+#define B(idx)  (1<<(idx))
+
+#define META_IDX_MYMSTR_TTL4             0
+#define META_IDX_MYMSTR_TTL8             1
+#define META_IDX_MYMSTR_TYPE_ENC_LRU     2       // 4Bbit type, 4bit encoding, 24bits lru
+#define META_IDX_MYMSTR_VALUE_PTR        3
+#define META_IDX_MYMSTR_FLAG_NO_META     4
+
+#define TEST_CONTEXT(context) printf("\nContext: %s \n", context);
+
+int mstrTest(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    struct mstrKind kind_mymstr = {
+            .name = "my_mstr",
+            .metaSize[META_IDX_MYMSTR_TTL4]           = 4,
+            .metaSize[META_IDX_MYMSTR_TTL8]           = 8,
+            .metaSize[META_IDX_MYMSTR_TYPE_ENC_LRU]   = 4,
+            .metaSize[META_IDX_MYMSTR_VALUE_PTR]      = 8,
+            .metaSize[META_IDX_MYMSTR_FLAG_NO_META]   = 0,
+    };
+
+    TEST_CONTEXT("Create simple short mstr")
+    {
+        char *str = "foo";
+        mstr s = mstrNew(str, strlen(str), 0);
+        size_t expStrLen = strlen(str);
+
+        test_cond("Verify str length and alloc length",
+                  mstrAllocLen(s, NULL) == (1 + expStrLen + 1) &&   /* mstrhdr5 + str + null */
+                  mstrlen(s) == expStrLen &&                             /* expected strlen(str) */
+                  memcmp(s, str, expStrLen + 1) == 0);
+        mstrFree(&kind_mymstr, s);
+    }
+
+    TEST_CONTEXT("Create simple 40 bytes mstr")
+    {
+        char *str = "0123456789012345678901234567890123456789"; // 40 bytes
+        mstr s = mstrNew(str, strlen(str), 0);
+
+        test_cond("Verify str length and alloc length",
+                  mstrAllocLen(s, NULL) == (3 + 40 + 1) &&   /* mstrhdr8 + str + null */
+                  mstrlen(s) == 40 &&
+                  memcmp(s,str,40) == 0);
+        mstrFree(&kind_mymstr, s);
+    }
+
+    TEST_CONTEXT("Create mstr with random characters")
+    {
+        long unsigned int i;
+        char str[66000];
+        for (i = 0 ; i < sizeof(str) ; ++i) str[i] = rand() % 256;
+
+        size_t len[] = { 31, 32, 33, 255, 256, 257, 65535, 65536, 65537, 66000};
+        for (i = 0 ; i < sizeof(len) / sizeof(len[0]) ; ++i) {
+            char title[100];
+            mstr s = mstrNew(str, len[i], 0);
+            size_t mstrhdrSize = (len[i] < 1<<5) ? sizeof(struct mstrhdr5) :
+                            (len[i] < 1<<8) ? sizeof(struct mstrhdr8) :
+                            (len[i] < 1<<16) ? sizeof(struct mstrhdr16) :
+                            sizeof(struct mstrhdr64);
+
+            snprintf(title, sizeof(title), "Verify string of length %zu", len[i]);
+            test_cond(title,
+                      mstrAllocLen(s, NULL) == (mstrhdrSize + len[i] + 1) &&   /* mstrhdrX + str + null */
+                      mstrlen(s) == len[i] &&
+                      memcmp(s,str,len[i]) == 0);
+            mstrFree(&kind_mymstr, s);
+        }
+    }
+
+    TEST_CONTEXT("Create short mstr with TTL4")
+    {
+        uint32_t *ttl;
+        mstr s = mstrNewWithMeta(&kind_mymstr,
+                                 "foo",
+                                 strlen("foo"),
+                                 B(META_IDX_MYMSTR_TTL4), /* allocate with TTL4 metadata */
+                                 0);
+
+        ttl = mstrMetaRef(s, &kind_mymstr, META_IDX_MYMSTR_TTL4);
+        *ttl = 0x12345678;
+
+        test_cond("Verify memory-allocation and string lengths",
+                  mstrAllocLen(s, &kind_mymstr) == (1 + 3 + 2 + 1 + 4) && /* mstrhdr5 + str + null + mstrFlags + TLL */
+                  mstrlen(s) == 3);
+
+        unsigned char expMem[] = {0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x00, 0x1c, 'f', 'o', 'o', '\0' };
+        uint32_t value = 0x12345678;
+        memcpy(expMem, &value, sizeof(uint32_t));
+        test_cond("Verify string and TTL4 payload", memcmp(
+                mstrMetaRef(s, &kind_mymstr, 0) , expMem, sizeof(expMem)) == 0);
+
+        test_cond("Verify mstrIsMetaAttached() function works", mstrIsMetaAttached(s) != 0);
+
+        mstrFree(&kind_mymstr, s);
+    }
+
+    TEST_CONTEXT("Create short mstr with TTL4 and value ptr ")
+    {
+        mstr s = mstrNewWithMeta(&kind_mymstr, "foo", strlen("foo"),
+                                 B(META_IDX_MYMSTR_TTL4) | B(META_IDX_MYMSTR_VALUE_PTR), 0);
+        *((uint32_t *) (mstrMetaRef(s, &kind_mymstr,
+                                    META_IDX_MYMSTR_TTL4))) = 0x12345678;
+
+        test_cond("Verify length and alloc length",
+                  mstrAllocLen(s, &kind_mymstr) == (1 + 3 + 1 + 2 + 4 + 8) && /* mstrhdr5 + str + null + mstrFlags + TLL + PTR */
+                  mstrlen(s) == 3);
+        mstrFree(&kind_mymstr, s);
+    }
+
+    TEST_CONTEXT("Copy mstr and add it TTL4")
+    {
+        mstr s1 = mstrNew("foo", strlen("foo"), 0);
+        mstr s2 = mstrNewCopy(&kind_mymstr, s1, B(META_IDX_MYMSTR_TTL4));
+        *((uint32_t *) (mstrMetaRef(s2, &kind_mymstr, META_IDX_MYMSTR_TTL4))) = 0x12345678;
+
+        test_cond("Verify new mstr includes TTL4",
+                  mstrAllocLen(s2, &kind_mymstr) == (1 + 3 + 1 + 2 + 4) &&   /* mstrhdr5 + str + null + mstrFlags + TTL4 */
+                  mstrlen(s2) == 3 &&                   /* 'foo' = 3bytes */
+                  memcmp(s2, "foo\0", 4) == 0);
+
+        mstr s3 = mstrNewCopy(&kind_mymstr, s2, B(META_IDX_MYMSTR_TTL4));
+        unsigned char expMem[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0x1, 0x0, 0x1c, 'f', 'o', 'o', '\0' };
+        uint32_t value = 0x12345678;
+        memcpy(expMem, &value, sizeof(uint32_t));
+
+        char *ppp = mstrGetStartAlloc(s3, &kind_mymstr);
+        test_cond("Verify string and TTL4 payload",
+                  memcmp(ppp, expMem, sizeof(expMem)) == 0);
+
+        mstrPrint(s3, &kind_mymstr, 1);
+        mstrFree(&kind_mymstr, s1);
+        mstrFree(&kind_mymstr, s2);
+        mstrFree(&kind_mymstr, s3);
+    }
+
+    return 0;
+}
+#endif
diff --git a/src/mstr.h b/src/mstr.h
new file mode 100644
index 00000000000..1613a637ec6
--- /dev/null
+++ b/src/mstr.h
@@ -0,0 +1,226 @@
+/*
+ * Copyright Redis Ltd. 2024 - present
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0 (RSALv2)
+ * or the Server Side Public License v1 (SSPLv1).
+ *
+ *
+ * WHAT IS MSTR (M-STRING)?
+ * ------------------------
+ * mstr stands for immutable string with optional metadata attached.
+ *
+ * sds string is widely used across the system and serves as a general purpose
+ * container to hold data. The need to optimize memory and aggregate strings
+ * along with metadata and store it into Redis data-structures as single bulk keep
+ * reoccur. One thought might be, why not to extend sds to support metadata. The
+ * answer is that sds is mutable string in its nature, with wide API (split, join,
+ * etc.). Pushing metadata logic into sds will make it very fragile, and complex
+ * to maintain.
+ *
+ * Another idea involved using a simple struct with flags and a dynamic buf[] at the
+ * end. While this could be viable, it introduces considerable complexity and would
+ * need maintenance across different contexts.
+ *
+ * As an alternative, we introduce a new implementation of immutable strings,
+ * with limited API, and with the option to attach metadata. The representation
+ * of the string, without any metadata, in its basic form, resembles SDS but
+ * without the API to manipulate the string. Only to attach metadata to it. The
+ * following diagram shows the memory layout of mstring (mstrhdr8) when no
+ * metadata is attached:
+ *
+ *     +----------------------------------------------+
+ *     | mstrhdr8                       | c-string |  |
+ *     +--------------------------------+-------------+
+ *     |8b   |2b     |1b      |5b       |?bytes    |8b|
+ *     | Len | Type  |m-bit=0 | Unused  | String   |\0|
+ *     +----------------------------------------------+
+ *                                      ^
+ *                                      |
+ *  mstrNew() returns pointer to here --+
+ *
+ * If  metadata-flag is set, depicted in diagram above as m-bit in the diagram,
+ * then the header will be preceded with additional 16 bits of metadata flags such
+ * that if i'th bit is set, then the i'th metadata structure is attached to the
+ * mstring. The metadata layout and their sizes are defined by mstrKind structure
+ * (More below).
+ *
+ * The following diagram shows the memory layout of mstr (mstrhdr8) when 3 bits in mFlags
+ * are set to indicate that 3 fields of metadata are attached to the mstring at the
+ * beginning.
+ *
+ *   +-------------------------------------------------------------------------------+
+ *   | METADATA FIELDS       | mflags | mstrhdr8                       | c-string |  |
+ *   +-----------------------+--------+--------------------------------+-------------+
+ *   |?bytes |?bytes |?bytes |16b     |8b   |2b     |1b      |5b       |?bytes    |8b|
+ *   | Meta3 | Meta2 | Meta0 | 0x1101 | Len | Type  |m-bit=1 | Unused  | String   |\0|
+ *   +-------------------------------------------------------------------------------+
+ *                                                                     ^
+ *                                                                     |
+ *                         mstrNewWithMeta() returns pointer to here --+
+ *
+ * mstr allows to define different kinds (groups) of mstrings, each with its
+ * own unique metadata layout. For example, in case of hash-fields, all instances of
+ * it can optionally have TTL metadata attached to it. This is achieved by first
+ * prototyping a single mstrKind structure that defines the metadata layout and sizes
+ * of this specific kind. Now each hash-field instance has still the freedom to
+ * attach or not attach the metadata to it, and metadata flags (mFlags) of the
+ * instance will reflect this decision.
+ *
+ * In the future, the keys of Redis keyspace can be another kind of mstring that
+ * has TTL, LRU or even dictEntry metadata embedded into. Unlike vptr in c++, this
+ * struct won't be attached to mstring but will be passed as yet another argument
+ * to API, to save memory. In addition, each instance of a given mstrkind can hold
+ * any subset of metadata and the 8 bits of metadata-flags will reflect it.
+ *
+ * The following example shows how to define mstrKind for possible future keyspace
+ * that aggregates several keyspace related metadata into one compact, singly
+ * allocated, mstring.
+ *
+ *      typedef enum HkeyMetaFlags {
+ *          HKEY_META_VAL_REF_COUNT    = 0,  // refcount
+ *          HKEY_META_VAL_REF          = 1,  // Val referenced
+ *          HKEY_META_EXPIRE           = 2,  // TTL and more
+ *          HKEY_META_TYPE_ENC_LRU     = 3,  // TYPE + LRU + ENC
+ *          HKEY_META_DICT_ENT_NEXT    = 4,  // Next dict entry
+ *          // Following two must be together and in this order
+ *          HKEY_META_VAL_EMBED8       = 5,  // Val embedded, max 7 bytes
+ *          HKEY_META_VAL_EMBED16      = 6,  // Val embedded, max 15 bytes (23 with EMBED8)
+ *      } HkeyMetaFlags;
+ *
+ *      mstrKind hkeyKind = {
+ *          .name = "hkey",
+ *          .metaSize[HKEY_META_VAL_REF_COUNT] = 4,
+ *          .metaSize[HKEY_META_VAL_REF]       = 8,
+ *          .metaSize[HKEY_META_EXPIRE]        = sizeof(ExpireMeta),
+ *          .metaSize[HKEY_META_TYPE_ENC_LRU]  = 8,
+ *          .metaSize[HKEY_META_DICT_ENT_NEXT] = 8,
+ *          .metaSize[HKEY_META_VAL_EMBED8]    = 8,
+ *          .metaSize[HKEY_META_VAL_EMBED16]   = 16,
+ *      };
+ *
+ * MSTR-ALIGNMENT
+ * --------------
+ * There are two types of alignments to take into consideration:
+ * 1. Alignment of the metadata.
+ * 2. Alignment of returned mstr pointer
+ *
+ * 1) As the metadatas layout are reversed to their enumeration, it is recommended
+ *    to put metadata with "better" alignment first in memory layout (enumerated
+ *    last) and the worst, or those that simply don't require any alignment will be
+ *    last in memory layout (enumerated first). This is similar the to the applied
+ *    consideration when defining new struct in C. Note also that each metadata
+ *    might either be attached to mstr or not which complicates the design phase
+ *    of a new mstrKind a little.
+ *
+ *    In the example above, HKEY_META_VAL_REF_COUNT, with worst alignment of 4
+ *    bytes, is enumerated first, and therefore, will be last in memory layout.
+ *
+ * 2) Few optimizations in Redis rely on the fact that sds address is always an odd
+ *    pointer. We can achieve the same with a little effort. It was already taken
+ *    care that all headers of type mstrhdrX has odd size. With that in mind, if
+ *    a new kind of mstr is required to be limited to odd addresses, then we must
+ *    make sure that sizes of all related metadatas that are defined in mstrKind
+ *    are even in size.
+ */
+
+#ifndef __MSTR_H
+#define __MSTR_H
+
+#include <sys/types.h>
+#include <stdarg.h>
+#include <stdint.h>
+
+/* Selective copy of ifndef from server.h instead of including it */
+#ifndef static_assert
+#define static_assert(expr, lit) extern char __static_assert_failure[(expr) ? 1:-1]
+#endif
+
+#define MSTR_TYPE_5         0
+#define MSTR_TYPE_8         1
+#define MSTR_TYPE_16        2
+#define MSTR_TYPE_64        3
+#define MSTR_TYPE_MASK      3
+#define MSTR_TYPE_BITS      2
+
+#define MSTR_META_MASK      4
+
+#define MSTR_HDR(T,s) ((struct mstrhdr##T *)((s)-(sizeof(struct mstrhdr##T))))
+#define MSTR_HDR_VAR(T,s) struct mstrhdr##T *sh = (void*)((s)-(sizeof(struct mstrhdr##T)));
+
+#define MSTR_META_BITS  1  /* is metadata attached? */
+#define MSTR_TYPE_5_LEN(f) ((f) >> (MSTR_TYPE_BITS + MSTR_META_BITS))
+#define CREATE_MSTR_INFO(len, ismeta, type) ( (((len<<MSTR_META_BITS) + ismeta) << (MSTR_TYPE_BITS)) | type )
+
+/* mimic plain c-string */
+typedef char *mstr;
+
+/* Flags that can be set on mstring to indicate for attached metadata. It is
+ * */
+typedef uint16_t mstrFlags;
+
+struct __attribute__ ((__packed__)) mstrhdr5 {
+    unsigned char info; /* 2 lsb of type, 1 metadata, and 5 msb of string length */
+    char buf[];
+};
+struct __attribute__ ((__packed__)) mstrhdr8 {
+    uint8_t unused;  /* To achieve odd size header (See comment above) */
+    uint8_t len;
+    unsigned char info; /* 2 lsb of type, 6 unused bits */
+    char buf[];
+};
+struct __attribute__ ((__packed__)) mstrhdr16 {
+    uint16_t len;
+    unsigned char info; /* 2 lsb of type, 6 unused bits */
+    char buf[];
+};
+struct __attribute__ ((__packed__)) mstrhdr64 {
+    uint64_t len;
+    unsigned char info; /* 2 lsb of type, 6 unused bits */
+    char buf[];
+};
+
+#define NUM_MSTR_FLAGS (sizeof(mstrFlags)*8)
+
+/* mstrKind is used to define a kind (a group) of mstring with its own metadata layout */
+ typedef struct mstrKind {
+    const char *name;
+    int metaSize[NUM_MSTR_FLAGS];
+} mstrKind;
+
+mstr mstrNew(const char *initStr, size_t lenStr, int trymalloc);
+
+mstr mstrNewWithMeta(struct mstrKind *kind, const char *initStr, size_t lenStr, mstrFlags flags, int trymalloc);
+
+mstr mstrNewCopy(struct mstrKind *kind, mstr src, mstrFlags newFlags);
+
+void *mstrGetAllocPtr(struct mstrKind *kind, mstr str);
+
+void mstrFree(struct mstrKind *kind, mstr s);
+
+mstrFlags *mstrFlagsRef(mstr s);
+
+void *mstrMetaRef(mstr s, struct mstrKind *kind, int flagIdx);
+
+size_t mstrlen(const mstr s);
+
+/* return non-zero if metadata is attached to mstring */
+static inline int mstrIsMetaAttached(mstr s) { return s[-1] & MSTR_META_MASK; }
+
+/* return whether if a specific flag-index is set */
+static inline int mstrGetFlag(mstr s, int flagIdx) { return *mstrFlagsRef(s) & (1 << flagIdx); }
+
+/* DEBUG */
+void mstrPrint(mstr s, struct mstrKind *kind, int verbose);
+
+/* See comment above about MSTR-ALIGNMENT(2) */
+static_assert(sizeof(struct mstrhdr5 ) % 2 == 1, "must be odd");
+static_assert(sizeof(struct mstrhdr8 ) % 2 == 1, "must be odd");
+static_assert(sizeof(struct mstrhdr16 ) % 2 == 1, "must be odd");
+static_assert(sizeof(struct mstrhdr64 ) % 2 == 1, "must be odd");
+static_assert(sizeof(mstrFlags ) % 2 == 0, "must be even to keep mstr pointer odd");
+
+#ifdef REDIS_TEST
+int mstrTest(int argc, char *argv[], int flags);
+#endif
+
+#endif
diff --git a/src/multi.c b/src/multi.c
index 65d502c2536..6d1ba5697d2 100644
--- a/src/multi.c
+++ b/src/multi.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -303,6 +282,8 @@ void watchForKey(client *c, robj *key) {
     listNode *ln;
     watchedKey *wk;
 
+    if (listLength(c->watched_keys) == 0) server.watching_clients++;
+
     /* Check if we are already watching for this key */
     listRewind(c->watched_keys,&li);
     while((ln = listNext(&li))) {
@@ -353,6 +334,7 @@ void unwatchAllKeys(client *c) {
         decrRefCount(wk->key);
         zfree(wk);
     }
+    server.watching_clients--;
 }
 
 /* Iterates over the watched_keys list and looks for an expired key. Keys which
@@ -394,7 +376,7 @@ void touchWatchedKey(redisDb *db, robj *key) {
             /* The key was already expired when WATCH was called. */
             if (db == wk->db &&
                 equalStringObjects(key, wk->key) &&
-                dictFind(db->dict, key->ptr) == NULL)
+                dbFind(db, key->ptr) == NULL)
             {
                 /* Already expired key is deleted, so logically no change. Clear
                  * the flag. Deleted keys are not flagged as expired. */
@@ -432,9 +414,9 @@ void touchAllWatchedKeysInDb(redisDb *emptied, redisDb *replaced_with) {
     dictIterator *di = dictGetSafeIterator(emptied->watched_keys);
     while((de = dictNext(di)) != NULL) {
         robj *key = dictGetKey(de);
-        int exists_in_emptied = dictFind(emptied->dict, key->ptr) != NULL;
+        int exists_in_emptied = dbFind(emptied, key->ptr) != NULL;
         if (exists_in_emptied ||
-            (replaced_with && dictFind(replaced_with->dict, key->ptr)))
+            (replaced_with && dbFind(replaced_with, key->ptr) != NULL))
         {
             list *clients = dictGetVal(de);
             if (!clients) continue;
@@ -442,7 +424,7 @@ void touchAllWatchedKeysInDb(redisDb *emptied, redisDb *replaced_with) {
             while((ln = listNext(&li))) {
                 watchedKey *wk = redis_member2struct(watchedKey, node, ln);
                 if (wk->expired) {
-                    if (!replaced_with || !dictFind(replaced_with->dict, key->ptr)) {
+                    if (!replaced_with || !dbFind(replaced_with, key->ptr)) {
                         /* Expired key now deleted. No logical change. Clear the
                          * flag. Deleted keys are not flagged as expired. */
                         wk->expired = 0;
diff --git a/src/networking.c b/src/networking.c
index 56273fc7e52..be5fa06942b 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -32,6 +11,7 @@
 #include "cluster.h"
 #include "script.h"
 #include "fpconv_dtoa.h"
+#include "fmtargs.h"
 #include <sys/socket.h>
 #include <sys/uio.h>
 #include <math.h>
@@ -51,6 +31,14 @@ size_t sdsZmallocSize(sds s) {
     return zmalloc_size(sh);
 }
 
+/* Return the size consumed from the allocator, for the specified hfield with
+ * metadata (mstr), including internal fragmentation. This function is used in
+ * order to compute the client output buffer size. */
+size_t hfieldZmallocSize(hfield s) {
+    void *sh = hfieldGetAllocPtr(s);
+    return zmalloc_size(sh);
+}
+
 /* Return the amount of memory used by the sds string at object->ptr
  * for a string object. This includes internal fragmentation. */
 size_t getStringObjectSdsUsedMemory(robj *o) {
@@ -413,8 +401,9 @@ void _addReplyToBufferOrList(client *c, const char *s, size_t len) {
      * to a channel which we are subscribed to, then we wanna postpone that message to be added
      * after the command's reply (specifically important during multi-exec). the exception is
      * the SUBSCRIBE command family, which (currently) have a push message instead of a proper reply.
-     * The check for executing_client also avoids affecting push messages that are part of eviction. */
-    if (c == server.current_client && (c->flags & CLIENT_PUSHING) &&
+     * The check for executing_client also avoids affecting push messages that are part of eviction.
+     * Check CLIENT_PUSHING first to avoid race conditions, as it's absent in module's fake client. */
+    if ((c->flags & CLIENT_PUSHING) && c == server.current_client &&
         server.executing_client && !cmdHasPushAsReply(server.executing_client->cmd))
     {
         _addReplyProtoToList(c,server.pending_push_messages,s,len);
@@ -1116,14 +1105,18 @@ void addReplyVerbatim(client *c, const char *s, size_t len, const char *ext) {
     }
 }
 
-/* Add an array of C strings as status replies with a heading.
- * This function is typically invoked by from commands that support
- * subcommands in response to the 'help' subcommand. The help array
- * is terminated by NULL sentinel. */
-void addReplyHelp(client *c, const char **help) {
+/* This function is similar to the addReplyHelp function but adds the
+ * ability to pass in two arrays of strings. Some commands have
+ * some additional subcommands based on the specific feature implementation
+ * Redis is compiled with (currently just clustering). This function allows
+ * to pass is the common subcommands in `help` and any implementation
+ * specific subcommands in `extended_help`.
+ */
+void addExtendedReplyHelp(client *c, const char **help, const char **extended_help) {
     sds cmd = sdsnew((char*) c->argv[0]->ptr);
     void *blenp = addReplyDeferredLen(c);
     int blen = 0;
+    int idx = 0;
 
     sdstoupper(cmd);
     addReplyStatusFormat(c,
@@ -1131,6 +1124,10 @@ void addReplyHelp(client *c, const char **help) {
     sdsfree(cmd);
 
     while (help[blen]) addReplyStatus(c,help[blen++]);
+    if (extended_help) {
+        while (extended_help[idx]) addReplyStatus(c,extended_help[idx++]);
+    }
+    blen += idx;
 
     addReplyStatus(c,"HELP");
     addReplyStatus(c,"    Print this help.");
@@ -1140,6 +1137,14 @@ void addReplyHelp(client *c, const char **help) {
     setDeferredArrayLen(c,blenp,blen);
 }
 
+/* Add an array of C strings as status replies with a heading.
+ * This function is typically invoked by commands that support
+ * subcommands in response to the 'help' subcommand. The help array
+ * is terminated by NULL sentinel. */
+void addReplyHelp(client *c, const char **help) {
+    addExtendedReplyHelp(c, help, NULL);
+}
+
 /* Add a suggestive error reply.
  * This function is typically invoked by from commands that support
  * subcommands in response to an unknown subcommand or argument error. */
@@ -1433,7 +1438,7 @@ void unlinkClient(client *c) {
     listNode *ln;
 
     /* If this is marked as current client unset it. */
-    if (server.current_client == c) server.current_client = NULL;
+    if (c->conn && server.current_client == c) server.current_client = NULL;
 
     /* Certain operations must be done only if the client has an active connection.
      * If the client was already unlinked or if it's a "fake client" the
@@ -1477,7 +1482,7 @@ void unlinkClient(client *c) {
     }
 
     /* Remove from the list of pending reads if needed. */
-    serverAssert(io_threads_op == IO_THREADS_OP_IDLE);
+    serverAssert(!c->conn || io_threads_op == IO_THREADS_OP_IDLE);
     if (c->pending_read_list_node != NULL) {
         listDelNode(server.clients_pending_read,c->pending_read_list_node);
         c->pending_read_list_node = NULL;
@@ -1529,6 +1534,7 @@ void clearClientConnectionState(client *c) {
     pubsubUnsubscribeAllChannels(c,0);
     pubsubUnsubscribeShardAllChannels(c, 0);
     pubsubUnsubscribeAllPatterns(c,0);
+    unmarkClientAsPubSub(c);
 
     if (c->name) {
         decrRefCount(c->name);
@@ -1539,10 +1545,22 @@ void clearClientConnectionState(client *c) {
      * represent the client library behind the connection. */
     
     /* Selectively clear state flags not covered above */
-    c->flags &= ~(CLIENT_ASKING|CLIENT_READONLY|CLIENT_PUBSUB|CLIENT_REPLY_OFF|
+    c->flags &= ~(CLIENT_ASKING|CLIENT_READONLY|CLIENT_REPLY_OFF|
                   CLIENT_REPLY_SKIP_NEXT|CLIENT_NO_TOUCH|CLIENT_NO_EVICT);
 }
 
+void deauthenticateAndCloseClient(client *c) {
+    c->user = DefaultUser;
+    c->authenticated = 0;
+    /* We will write replies to this client later, so we can't
+     * close it directly even if async. */
+    if (c == server.current_client) {
+        c->flags |= CLIENT_CLOSE_AFTER_COMMAND;
+    } else {
+        freeClientAsync(c);
+    }
+}
+
 void freeClient(client *c) {
     listNode *ln;
 
@@ -1614,6 +1632,7 @@ void freeClient(client *c) {
     pubsubUnsubscribeAllChannels(c,0);
     pubsubUnsubscribeShardAllChannels(c, 0);
     pubsubUnsubscribeAllPatterns(c,0);
+    unmarkClientAsPubSub(c);
     dictRelease(c->pubsub_channels);
     dictRelease(c->pubsub_patterns);
     dictRelease(c->pubsubshard_channels);
@@ -1630,6 +1649,12 @@ void freeClient(client *c) {
     reqresReset(c, 1);
 #endif
 
+    /* Remove the contribution that this client gave to our
+     * incrementally computed memory usage. */
+    if (c->conn)
+        server.stat_clients_type_memory[c->last_memory_type] -=
+            c->last_memory_usage;
+
     /* Unlink the client: this will close the socket, remove the I/O
      * handlers, and remove references of the client from different
      * places where active clients may be referenced. */
@@ -1678,10 +1703,6 @@ void freeClient(client *c) {
      * we lost the connection with the master. */
     if (c->flags & CLIENT_MASTER) replicationHandleMasterDisconnection();
 
-    /* Remove the contribution that this client gave to our
-     * incrementally computed memory usage. */
-    server.stat_clients_type_memory[c->last_memory_type] -=
-        c->last_memory_usage;
     /* Remove client from memory usage buckets */
     if (c->mem_usage_bucket) {
         c->mem_usage_bucket->mem_usage_sum -= c->last_memory_usage;
@@ -1700,7 +1721,7 @@ void freeClient(client *c) {
     zfree(c);
 }
 
-/* Schedule a client to free it at a safe time in the serverCron() function.
+/* Schedule a client to free it at a safe time in the beforeSleep() function.
  * This function is useful when we need to terminate a client but we are in
  * a context where calling freeClient() is not possible, because the client
  * should be valid for the continuation of the flow of the program. */
@@ -1712,6 +1733,9 @@ void freeClientAsync(client *c) {
      * idle. */
     if (c->flags & CLIENT_CLOSE_ASAP || c->flags & CLIENT_SCRIPT) return;
     c->flags |= CLIENT_CLOSE_ASAP;
+    /* Replicas that was marked as CLIENT_CLOSE_ASAP should not keep the
+     * replication backlog from been trimmed. */
+    if (c->flags & CLIENT_SLAVE) freeReplicaReferencedReplBuffer(c);
     if (server.io_threads_num == 1) {
         /* no need to bother with locking if there's just one thread (the main thread) */
         listAddNodeTail(server.clients_to_close,c);
@@ -1793,8 +1817,9 @@ int freeClientsInAsyncFreeQueue(void) {
  * are not registered clients. */
 client *lookupClientByID(uint64_t id) {
     id = htonu64(id);
-    client *c = raxFind(server.clients_index,(unsigned char*)&id,sizeof(id));
-    return (c == raxNotFound) ? NULL : c;
+    void *c = NULL;
+    raxFind(server.clients_index,(unsigned char*)&id,sizeof(id),&c);
+    return c;
 }
 
 /* This function should be called from _writeToClient when the reply list is not empty,
@@ -2467,7 +2492,7 @@ int processCommandAndResetClient(client *c) {
         commandProcessed(c);
         /* Update the client's memory to include output buffer growth following the
          * processed command. */
-        updateClientMemUsageAndBucket(c);
+        if (c->conn) updateClientMemUsageAndBucket(c);
     }
 
     if (server.current_client == NULL) deadclient = 1;
@@ -2697,7 +2722,13 @@ void readQueryFromClient(connection *conn) {
         atomicIncr(server.stat_net_input_bytes, nread);
     }
 
-    if (!(c->flags & CLIENT_MASTER) && sdslen(c->querybuf) > server.client_max_querybuf_len) {
+    if (!(c->flags & CLIENT_MASTER) &&
+        /* The commands cached in the MULTI/EXEC queue have not been executed yet,
+         * so they are also considered a part of the query buffer in a broader sense.
+         *
+         * For unauthenticated clients, the query buffer cannot exceed 1MB at most. */
+        (c->mstate.argv_len_sums + sdslen(c->querybuf) > server.client_max_querybuf_len ||
+         (c->mstate.argv_len_sums + sdslen(c->querybuf) > 1024*1024 && authRequired(c)))) {
         sds ci = catClientInfoString(sdsempty(),c), bytes = sdsempty();
 
         bytes = sdscatrepr(bytes,c->querybuf,64);
@@ -2705,6 +2736,7 @@ void readQueryFromClient(connection *conn) {
         sdsfree(ci);
         sdsfree(bytes);
         freeClientAsync(c);
+        atomicIncr(server.stat_client_qbuf_limit_disconnections, 1);
         goto done;
     }
 
@@ -2814,39 +2846,38 @@ sds catClientInfoString(sds s, client *client) {
         used_blocks_of_repl_buf = last->id - cur->id + 1;
     }
 
-    sds ret = sdscatfmt(s,
-        "id=%U addr=%s laddr=%s %s name=%s age=%I idle=%I flags=%s db=%i sub=%i psub=%i ssub=%i multi=%i qbuf=%U qbuf-free=%U argv-mem=%U multi-mem=%U rbs=%U rbp=%U obl=%U oll=%U omem=%U tot-mem=%U events=%s cmd=%s user=%s redir=%I resp=%i lib-name=%s lib-ver=%s",
-        (unsigned long long) client->id,
-        getClientPeerId(client),
-        getClientSockname(client),
-        connGetInfo(client->conn, conninfo, sizeof(conninfo)),
-        client->name ? (char*)client->name->ptr : "",
-        (long long)(server.unixtime - client->ctime),
-        (long long)(server.unixtime - client->lastinteraction),
-        flags,
-        client->db->id,
-        (int) dictSize(client->pubsub_channels),
-        (int) dictSize(client->pubsub_patterns),
-        (int) dictSize(client->pubsubshard_channels),
-        (client->flags & CLIENT_MULTI) ? client->mstate.count : -1,
-        (unsigned long long) sdslen(client->querybuf),
-        (unsigned long long) sdsavail(client->querybuf),
-        (unsigned long long) client->argv_len_sum,
-        (unsigned long long) client->mstate.argv_len_sums,
-        (unsigned long long) client->buf_usable_size,
-        (unsigned long long) client->buf_peak,
-        (unsigned long long) client->bufpos,
-        (unsigned long long) listLength(client->reply) + used_blocks_of_repl_buf,
-        (unsigned long long) obufmem, /* should not include client->buf since we want to see 0 for static clients. */
-        (unsigned long long) total_mem,
-        events,
-        client->lastcmd ? client->lastcmd->fullname : "NULL",
-        client->user ? client->user->name : "(superuser)",
-        (client->flags & CLIENT_TRACKING) ? (long long) client->client_tracking_redirection : -1,
-        client->resp,
-        client->lib_name ? (char*)client->lib_name->ptr : "",
-        client->lib_ver ? (char*)client->lib_ver->ptr : ""
-        );
+    sds ret = sdscatfmt(s, FMTARGS(
+        "id=%U", (unsigned long long) client->id,
+        " addr=%s", getClientPeerId(client),
+        " laddr=%s", getClientSockname(client),
+        " %s", connGetInfo(client->conn, conninfo, sizeof(conninfo)),
+        " name=%s", client->name ? (char*)client->name->ptr : "",
+        " age=%I", (long long)(commandTimeSnapshot() / 1000 - client->ctime),
+        " idle=%I", (long long)(server.unixtime - client->lastinteraction),
+        " flags=%s", flags,
+        " db=%i", client->db->id,
+        " sub=%i", (int) dictSize(client->pubsub_channels),
+        " psub=%i", (int) dictSize(client->pubsub_patterns),
+        " ssub=%i", (int) dictSize(client->pubsubshard_channels),
+        " multi=%i", (client->flags & CLIENT_MULTI) ? client->mstate.count : -1,
+        " watch=%i", (int) listLength(client->watched_keys),
+        " qbuf=%U", (unsigned long long) sdslen(client->querybuf),
+        " qbuf-free=%U", (unsigned long long) sdsavail(client->querybuf),
+        " argv-mem=%U", (unsigned long long) client->argv_len_sum,
+        " multi-mem=%U", (unsigned long long) client->mstate.argv_len_sums,
+        " rbs=%U", (unsigned long long) client->buf_usable_size,
+        " rbp=%U", (unsigned long long) client->buf_peak,
+        " obl=%U", (unsigned long long) client->bufpos,
+        " oll=%U", (unsigned long long) listLength(client->reply) + used_blocks_of_repl_buf,
+        " omem=%U", (unsigned long long) obufmem, /* should not include client->buf since we want to see 0 for static clients. */
+        " tot-mem=%U", (unsigned long long) total_mem,
+        " events=%s", events,
+        " cmd=%s", client->lastcmd ? client->lastcmd->fullname : "NULL",
+        " user=%s", client->user ? client->user->name : "(superuser)",
+        " redir=%I", (client->flags & CLIENT_TRACKING) ? (long long) client->client_tracking_redirection : -1,
+        " resp=%i", client->resp,
+        " lib-name=%s", client->lib_name ? (char*)client->lib_name->ptr : "",
+        " lib-ver=%s", client->lib_ver ? (char*)client->lib_ver->ptr : ""));
     return ret;
 }
 
@@ -3014,6 +3045,10 @@ void clientCommand(client *c) {
 "      Kill connections authenticated by <username>.",
 "    * SKIPME (YES|NO)",
 "      Skip killing current connection (default: yes).",
+"    * ID <client-id>",
+"      Kill connections by client id.",
+"    * MAXAGE <maxage>",
+"      Kill connections older than the specified age.",
 "LIST [options ...]",
 "    Return information about client connections. Options:",
 "    * TYPE (NORMAL|MASTER|REPLICA|PUBSUB)",
@@ -3125,6 +3160,7 @@ NULL
         user *user = NULL;
         int type = -1;
         uint64_t id = 0;
+        long long max_age = 0;
         int skipme = 1;
         int killed = 0, close_this_client = 0;
 
@@ -3146,6 +3182,18 @@ NULL
                                                       "client-id should be greater than 0") != C_OK)
                         return;
                     id = tmp;
+                } else if (!strcasecmp(c->argv[i]->ptr,"maxage") && moreargs) {
+                    long long tmp;
+
+                    if (getLongLongFromObjectOrReply(c, c->argv[i+1], &tmp,
+                                                     "maxage is not an integer or out of range") != C_OK)
+                        return;
+                    if (tmp <= 0) {
+                        addReplyError(c, "maxage should be greater than 0");
+                        return;
+                    }
+
+                    max_age = tmp;
                 } else if (!strcasecmp(c->argv[i]->ptr,"type") && moreargs) {
                     type = getClientTypeByName(c->argv[i+1]->ptr);
                     if (type == -1) {
@@ -3195,6 +3243,7 @@ NULL
             if (id != 0 && client->id != id) continue;
             if (user && client->user != user) continue;
             if (c == client && skipme) continue;
+            if (max_age != 0 && (long long)(commandTimeSnapshot() / 1000 - client->ctime) < max_age) continue;
 
             /* Kill it. */
             if (c == client) {
@@ -3723,7 +3772,9 @@ void replaceClientCommandVector(client *c, int argc, robj **argv) {
  * 1. Make sure there are no "holes" and all the arguments are set.
  * 2. If the original argument vector was longer than the one we
  *    want to end with, it's up to the caller to set c->argc and
- *    free the no longer used objects on c->argv. */
+ *    free the no longer used objects on c->argv.
+ * 3. To remove argument at i'th index, pass NULL as new value
+ */
 void rewriteClientCommandArgument(client *c, int i, robj *newval) {
     robj *oldval;
     retainOriginalCommandVector(c);
@@ -3741,9 +3792,18 @@ void rewriteClientCommandArgument(client *c, int i, robj *newval) {
     }
     oldval = c->argv[i];
     if (oldval) c->argv_len_sum -= getStringObjectLen(oldval);
-    if (newval) c->argv_len_sum += getStringObjectLen(newval);
-    c->argv[i] = newval;
-    incrRefCount(newval);
+
+    if (newval) {
+        c->argv[i] = newval;
+        incrRefCount(newval);
+        c->argv_len_sum += getStringObjectLen(newval);
+    } else {
+        /* move the remaining arguments one step left */
+        for (int j = i+1; j < c->argc; j++) {
+            c->argv[j-1] = c->argv[j];
+        }
+        c->argv[--c->argc] = NULL;
+    }
     if (oldval) decrRefCount(oldval);
 
     /* If this is the command name make sure to fix c->cmd. */
@@ -3808,7 +3868,7 @@ size_t getClientMemoryUsage(client *c, size_t *output_buffer_mem_usage) {
  * classes of clients.
  *
  * The function will return one of the following:
- * CLIENT_TYPE_NORMAL -> Normal client
+ * CLIENT_TYPE_NORMAL -> Normal client, including MONITOR
  * CLIENT_TYPE_SLAVE  -> Slave
  * CLIENT_TYPE_PUBSUB -> Client subscribed to Pub/Sub channels
  * CLIENT_TYPE_MASTER -> The client representing our replication master.
@@ -3929,6 +3989,7 @@ int closeClientOnOutputBufferLimitReached(client *c, int async) {
                       client);
         }
         sdsfree(client);
+        server.stat_client_outbuf_limit_disconnections++;
         return  1;
     }
     return 0;
diff --git a/src/notify.c b/src/notify.c
index 2881a48dba8..2377166995c 100644
--- a/src/notify.c
+++ b/src/notify.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2013, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2013-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -101,7 +80,7 @@ sds keyspaceEventsFlagsToString(int flags) {
  * 'event' is a C string representing the event name.
  * 'key' is a Redis object representing the key name.
  * 'dbid' is the database ID where the key lives.  */
-void notifyKeyspaceEvent(int type, char *event, robj *key, int dbid) {
+void notifyKeyspaceEvent(int type, const char *event, robj *key, int dbid) {
     sds chan;
     robj *chanobj, *eventobj;
     int len = -1;
diff --git a/src/object.c b/src/object.c
index 4b3526a02c6..2b42e7b3e63 100644
--- a/src/object.c
+++ b/src/object.c
@@ -1,31 +1,10 @@
 /* Redis Object implementation.
  *
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -232,8 +211,8 @@ robj *dupStringObject(const robj *o) {
     }
 }
 
-robj *createQuicklistObject(void) {
-    quicklist *l = quicklistCreate();
+robj *createQuicklistObject(int fill, int compress) {
+    quicklist *l = quicklistNew(fill, compress);
     robj *o = createObject(OBJ_LIST,l);
     o->encoding = OBJ_ENCODING_QUICKLIST;
     return o;
@@ -354,17 +333,7 @@ void freeZsetObject(robj *o) {
 }
 
 void freeHashObject(robj *o) {
-    switch (o->encoding) {
-    case OBJ_ENCODING_HT:
-        dictRelease((dict*) o->ptr);
-        break;
-    case OBJ_ENCODING_LISTPACK:
-        lpFree(o->ptr);
-        break;
-    default:
-        serverPanic("Unknown hash encoding type");
-        break;
-    }
+    hashTypeFree(o);
 }
 
 void freeModuleObject(robj *o) {
@@ -523,6 +492,9 @@ void dismissHashObject(robj *o, size_t size_hint) {
         dismissMemory(d->ht_table[1], DICTHT_SIZE(d->ht_size_exp[1])*sizeof(dictEntry*));
     } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
         dismissMemory(o->ptr, lpBytes((unsigned char*)o->ptr));
+    } else if (o->encoding == OBJ_ENCODING_LISTPACK_EX) {
+        listpackEx *lpt = o->ptr;
+        dismissMemory(lpt->lp, lpBytes((unsigned char*)lpt->lp));
     } else {
         serverPanic("Unknown hash encoding type");
     }
@@ -960,6 +932,7 @@ char *strEncoding(int encoding) {
     case OBJ_ENCODING_HT: return "hashtable";
     case OBJ_ENCODING_QUICKLIST: return "quicklist";
     case OBJ_ENCODING_LISTPACK: return "listpack";
+    case OBJ_ENCODING_LISTPACK_EX: return "listpackex";
     case OBJ_ENCODING_INTSET: return "intset";
     case OBJ_ENCODING_SKIPLIST: return "skiplist";
     case OBJ_ENCODING_EMBSTR: return "embstr";
@@ -1000,7 +973,6 @@ size_t streamRadixTreeMemoryUsage(rax *rax) {
  * are checked and averaged to estimate the total size. */
 #define OBJ_COMPUTE_SIZE_DEF_SAMPLES 5 /* Default sample size. */
 size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
-    sds ele, ele2;
     dict *d;
     dictIterator *di;
     struct dictEntry *de;
@@ -1035,9 +1007,9 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
         if (o->encoding == OBJ_ENCODING_HT) {
             d = o->ptr;
             di = dictGetIterator(d);
-            asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
+            asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictBuckets(d));
             while((de = dictNext(di)) != NULL && samples < sample_size) {
-                ele = dictGetKey(de);
+                sds ele = dictGetKey(de);
                 elesize += dictEntryMemUsage() + sdsZmallocSize(ele);
                 samples++;
             }
@@ -1058,7 +1030,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
             zskiplist *zsl = ((zset*)o->ptr)->zsl;
             zskiplistNode *znode = zsl->header->level[0].forward;
             asize = sizeof(*o)+sizeof(zset)+sizeof(zskiplist)+sizeof(dict)+
-                    (sizeof(struct dictEntry*)*dictSlots(d))+
+                    (sizeof(struct dictEntry*)*dictBuckets(d))+
                     zmalloc_size(zsl->header);
             while(znode != NULL && samples < sample_size) {
                 elesize += sdsZmallocSize(znode->ele);
@@ -1073,14 +1045,17 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
     } else if (o->type == OBJ_HASH) {
         if (o->encoding == OBJ_ENCODING_LISTPACK) {
             asize = sizeof(*o)+zmalloc_size(o->ptr);
+        } else if (o->encoding == OBJ_ENCODING_LISTPACK_EX) {
+            listpackEx *lpt = o->ptr;
+            asize = sizeof(*o) + zmalloc_size(lpt) + zmalloc_size(lpt->lp);
         } else if (o->encoding == OBJ_ENCODING_HT) {
             d = o->ptr;
             di = dictGetIterator(d);
-            asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
+            asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictBuckets(d));
             while((de = dictNext(di)) != NULL && samples < sample_size) {
-                ele = dictGetKey(de);
-                ele2 = dictGetVal(de);
-                elesize += sdsZmallocSize(ele) + sdsZmallocSize(ele2);
+                hfield ele = dictGetKey(de);
+                sds ele2 = dictGetVal(de);
+                elesize += hfieldZmallocSize(ele) + sdsZmallocSize(ele2);
                 elesize += dictEntryMemUsage();
                 samples++;
             }
@@ -1183,10 +1158,15 @@ struct redisMemOverhead *getMemoryOverheadData(void) {
         (float)server.cron_malloc_stats.process_rss / server.cron_malloc_stats.zmalloc_used;
     mh->total_frag_bytes =
         server.cron_malloc_stats.process_rss - server.cron_malloc_stats.zmalloc_used;
-    mh->allocator_frag =
-        (float)server.cron_malloc_stats.allocator_active / server.cron_malloc_stats.allocator_allocated;
-    mh->allocator_frag_bytes =
-        server.cron_malloc_stats.allocator_active - server.cron_malloc_stats.allocator_allocated;
+    /* Starting with redis 7.4, the lua memory is part of the total memory usage
+     * of redis, and that includes RSS and all other memory metrics. We only want
+     * to deduct it from active defrag. */
+    size_t frag_smallbins_bytes =
+        server.cron_malloc_stats.allocator_frag_smallbins_bytes - server.cron_malloc_stats.lua_allocator_frag_smallbins_bytes;
+    size_t allocated =
+        server.cron_malloc_stats.allocator_allocated - server.cron_malloc_stats.lua_allocator_allocated;
+    mh->allocator_frag = (float)frag_smallbins_bytes / allocated + 1;
+    mh->allocator_frag_bytes = frag_smallbins_bytes;
     mh->allocator_rss =
         (float)server.cron_malloc_stats.allocator_resident / server.cron_malloc_stats.allocator_active;
     mh->allocator_rss_bytes =
@@ -1246,29 +1226,31 @@ struct redisMemOverhead *getMemoryOverheadData(void) {
 
     for (j = 0; j < server.dbnum; j++) {
         redisDb *db = server.db+j;
-        long long keyscount = dictSize(db->dict);
-        if (keyscount==0) continue;
+        if (!kvstoreNumAllocatedDicts(db->keys)) continue;
+
+        unsigned long long keyscount = kvstoreSize(db->keys);
 
         mh->total_keys += keyscount;
         mh->db = zrealloc(mh->db,sizeof(mh->db[0])*(mh->num_dbs+1));
         mh->db[mh->num_dbs].dbid = j;
 
-        mem = dictMemUsage(db->dict) +
-              dictSize(db->dict) * sizeof(robj);
+        mem = kvstoreMemUsage(db->keys) +
+              keyscount * sizeof(robj);
         mh->db[mh->num_dbs].overhead_ht_main = mem;
         mem_total+=mem;
 
-        mem = dictMemUsage(db->expires);
+        mem = kvstoreMemUsage(db->expires);
         mh->db[mh->num_dbs].overhead_ht_expires = mem;
         mem_total+=mem;
 
-        /* Account for the slot to keys map in cluster mode */
-        mem = dictSize(db->dict) * dictEntryMetadataSize(db->dict) +
-              dictMetadataSize(db->dict);
-        mh->db[mh->num_dbs].overhead_ht_slot_to_keys = mem;
-        mem_total+=mem;
-
         mh->num_dbs++;
+
+        mh->overhead_db_hashtable_lut += kvstoreOverheadHashtableLut(db->keys);
+        mh->overhead_db_hashtable_lut += kvstoreOverheadHashtableLut(db->expires);
+        mh->overhead_db_hashtable_rehashing += kvstoreOverheadHashtableRehashing(db->keys);
+        mh->overhead_db_hashtable_rehashing += kvstoreOverheadHashtableRehashing(db->expires);
+        mh->db_dict_rehashing_count += kvstoreDictRehashingCount(db->keys);
+        mh->db_dict_rehashing_count += kvstoreDictRehashingCount(db->expires);
     }
 
     mh->overhead_total = mem_total;
@@ -1281,7 +1263,7 @@ struct redisMemOverhead *getMemoryOverheadData(void) {
     if (zmalloc_used > mh->startup_allocated)
         net_usage = zmalloc_used - mh->startup_allocated;
     mh->dataset_perc = (float)mh->dataset*100/net_usage;
-    mh->bytes_per_key = mh->total_keys ? (net_usage / mh->total_keys) : 0;
+    mh->bytes_per_key = mh->total_keys ? (mh->dataset / mh->total_keys) : 0;
 
     return mh;
 }
@@ -1551,19 +1533,18 @@ NULL
                 return;
             }
         }
-        if ((de = dictFind(c->db->dict,c->argv[2]->ptr)) == NULL) {
+        if ((de = dbFind(c->db, c->argv[2]->ptr)) == NULL) {
             addReplyNull(c);
             return;
         }
         size_t usage = objectComputeSize(c->argv[2],dictGetVal(de),samples,c->db->id);
         usage += sdsZmallocSize(dictGetKey(de));
         usage += dictEntryMemUsage();
-        usage += dictMetadataSize(c->db->dict);
         addReplyLongLong(c,usage);
     } else if (!strcasecmp(c->argv[1]->ptr,"stats") && c->argc == 2) {
         struct redisMemOverhead *mh = getMemoryOverheadData();
 
-        addReplyMapLen(c,27+mh->num_dbs);
+        addReplyMapLen(c,31+mh->num_dbs);
 
         addReplyBulkCString(c,"peak.allocated");
         addReplyLongLong(c,mh->peak_allocated);
@@ -1599,22 +1580,27 @@ NULL
             char dbname[32];
             snprintf(dbname,sizeof(dbname),"db.%zd",mh->db[j].dbid);
             addReplyBulkCString(c,dbname);
-            addReplyMapLen(c,3);
+            addReplyMapLen(c,2);
 
             addReplyBulkCString(c,"overhead.hashtable.main");
             addReplyLongLong(c,mh->db[j].overhead_ht_main);
 
             addReplyBulkCString(c,"overhead.hashtable.expires");
             addReplyLongLong(c,mh->db[j].overhead_ht_expires);
-
-            addReplyBulkCString(c,"overhead.hashtable.slot-to-keys");
-            addReplyLongLong(c,mh->db[j].overhead_ht_slot_to_keys);
         }
 
+        addReplyBulkCString(c,"overhead.db.hashtable.lut");
+        addReplyLongLong(c, mh->overhead_db_hashtable_lut);
+
+        addReplyBulkCString(c,"overhead.db.hashtable.rehashing");
+        addReplyLongLong(c, mh->overhead_db_hashtable_rehashing);
 
         addReplyBulkCString(c,"overhead.total");
         addReplyLongLong(c,mh->overhead_total);
 
+        addReplyBulkCString(c,"db.dict.rehashing.count");
+        addReplyLongLong(c, mh->db_dict_rehashing_count);
+
         addReplyBulkCString(c,"keys.count");
         addReplyLongLong(c,mh->total_keys);
 
@@ -1639,6 +1625,9 @@ NULL
         addReplyBulkCString(c,"allocator.resident");
         addReplyLongLong(c,server.cron_malloc_stats.allocator_resident);
 
+        addReplyBulkCString(c,"allocator.muzzy");
+        addReplyLongLong(c,server.cron_malloc_stats.allocator_muzzy);
+
         addReplyBulkCString(c,"allocator-fragmentation.ratio");
         addReplyDouble(c,mh->allocator_frag);
 
diff --git a/src/pqsort.c b/src/pqsort.c
index fab54e026a2..62527170573 100644
--- a/src/pqsort.c
+++ b/src/pqsort.c
@@ -1,7 +1,7 @@
 /* The following is the NetBSD libc qsort implementation modified in order to
  * support partial sorting of ranges for Redis.
  *
- * Copyright(C) 2009-2012 Salvatore Sanfilippo. All rights reserved.
+ * Copyright(C) 2009-current Redis Ltd.. All rights reserved.
  *
  * The original copyright notice follows. */
 
diff --git a/src/pqsort.h b/src/pqsort.h
index 824ab5c0969..621147424e7 100644
--- a/src/pqsort.h
+++ b/src/pqsort.h
@@ -1,32 +1,11 @@
 /* The following is the NetBSD libc qsort implementation modified in order to
  * support partial sorting of ranges for Redis.
  *
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  *
  * See the pqsort.c file for the original copyright notice. */
 
diff --git a/src/pubsub.c b/src/pubsub.c
index a13c5a61fbe..25099055f6b 100644
--- a/src/pubsub.c
+++ b/src/pubsub.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -36,7 +15,7 @@ typedef struct pubsubtype {
     int shard;
     dict *(*clientPubSubChannels)(client*);
     int (*subscriptionCount)(client*);
-    dict **serverPubSubChannels;
+    kvstore **serverPubSubChannels;
     robj **subscribeMsg;
     robj **unsubscribeMsg;
     robj **messageBulk;
@@ -67,7 +46,7 @@ dict* getClientPubSubShardChannels(client *c);
  * If a pattern is provided, the subset of channels is returned
  * matching the pattern.
  */
-void channelList(client *c, sds pat, dict* pubsub_channels);
+void channelList(client *c, sds pat, kvstore *pubsub_channels);
 
 /*
  * Pub/Sub type for global channels.
@@ -208,15 +187,14 @@ void addReplyPubsubPatUnsubscribed(client *c, robj *pattern) {
 
 /* Return the number of pubsub channels + patterns is handled. */
 int serverPubsubSubscriptionCount(void) {
-    return dictSize(server.pubsub_channels) + dictSize(server.pubsub_patterns);
+    return kvstoreSize(server.pubsub_channels) + dictSize(server.pubsub_patterns);
 }
 
 /* Return the number of pubsub shard level channels is handled. */
 int serverPubsubShardSubscriptionCount(void) {
-    return dictSize(server.pubsubshard_channels);
+    return kvstoreSize(server.pubsubshard_channels);
 }
 
-
 /* Return the number of channels + patterns a client is subscribed to. */
 int clientSubscriptionsCount(client *c) {
     return dictSize(c->pubsub_channels) + dictSize(c->pubsub_patterns);
@@ -241,27 +219,51 @@ int clientTotalPubSubSubscriptionCount(client *c) {
     return clientSubscriptionsCount(c) + clientShardSubscriptionsCount(c);
 }
 
+void markClientAsPubSub(client *c) {
+    if (!(c->flags & CLIENT_PUBSUB)) {
+        c->flags |= CLIENT_PUBSUB;
+        server.pubsub_clients++;
+    }
+}
+
+void unmarkClientAsPubSub(client *c) {
+    if (c->flags & CLIENT_PUBSUB) {
+        c->flags &= ~CLIENT_PUBSUB;
+        server.pubsub_clients--;
+    }
+}
+
 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
  * 0 if the client was already subscribed to that channel. */
 int pubsubSubscribeChannel(client *c, robj *channel, pubsubtype type) {
-    dictEntry *de;
-    list *clients = NULL;
+    dictEntry *de, *existing;
+    dict *clients = NULL;
     int retval = 0;
+    unsigned int slot = 0;
 
     /* Add the channel to the client -> channels hash table */
-    if (dictAdd(type.clientPubSubChannels(c),channel,NULL) == DICT_OK) {
+    void *position = dictFindPositionForInsert(type.clientPubSubChannels(c),channel,NULL);
+    if (position) { /* Not yet subscribed to this channel */
         retval = 1;
-        incrRefCount(channel);
         /* Add the client to the channel -> list of clients hash table */
-        de = dictFind(*type.serverPubSubChannels, channel);
-        if (de == NULL) {
-            clients = listCreate();
-            dictAdd(*type.serverPubSubChannels, channel, clients);
-            incrRefCount(channel);
+        if (server.cluster_enabled && type.shard) {
+            slot = getKeySlot(channel->ptr);
+        }
+
+        de = kvstoreDictAddRaw(*type.serverPubSubChannels, slot, channel, &existing);
+
+        if (existing) {
+            clients = dictGetVal(existing);
+            channel = dictGetKey(existing);
         } else {
-            clients = dictGetVal(de);
+            clients = dictCreate(&clientDictType);
+            kvstoreDictSetVal(*type.serverPubSubChannels, slot, de, clients);
+            incrRefCount(channel);
         }
-        listAddNodeTail(clients,c);
+
+        serverAssert(dictAdd(clients, c, NULL) != DICT_ERR);
+        serverAssert(dictInsertAtPosition(type.clientPubSubChannels(c), channel, position));
+        incrRefCount(channel);
     }
     /* Notify the client */
     addReplyPubsubSubscribed(c,channel,type);
@@ -272,9 +274,9 @@ int pubsubSubscribeChannel(client *c, robj *channel, pubsubtype type) {
  * 0 if the client was not subscribed to the specified channel. */
 int pubsubUnsubscribeChannel(client *c, robj *channel, int notify, pubsubtype type) {
     dictEntry *de;
-    list *clients;
-    listNode *ln;
+    dict *clients;
     int retval = 0;
+    int slot = 0;
 
     /* Remove the channel from the client -> channels hash table */
     incrRefCount(channel); /* channel may be just a pointer to the same object
@@ -282,22 +284,18 @@ int pubsubUnsubscribeChannel(client *c, robj *channel, int notify, pubsubtype ty
     if (dictDelete(type.clientPubSubChannels(c),channel) == DICT_OK) {
         retval = 1;
         /* Remove the client from the channel -> clients list hash table */
-        de = dictFind(*type.serverPubSubChannels, channel);
+        if (server.cluster_enabled && type.shard) {
+            slot = getKeySlot(channel->ptr);
+        }
+        de = kvstoreDictFind(*type.serverPubSubChannels, slot, channel);
         serverAssertWithInfo(c,NULL,de != NULL);
         clients = dictGetVal(de);
-        ln = listSearchKey(clients,c);
-        serverAssertWithInfo(c,NULL,ln != NULL);
-        listDelNode(clients,ln);
-        if (listLength(clients) == 0) {
-            /* Free the list and associated hash entry at all if this was
+        serverAssertWithInfo(c, NULL, dictDelete(clients, c) == DICT_OK);
+        if (dictSize(clients) == 0) {
+            /* Free the dict and associated hash entry at all if this was
              * the latest client, so that it will be possible to abuse
              * Redis PUBSUB creating millions of channels. */
-            dictDelete(*type.serverPubSubChannels, channel);
-            /* As this channel isn't subscribed by anyone, it's safe
-             * to remove the channel from the slot. */
-            if (server.cluster_enabled & type.shard) {
-                slotToChannelDel(channel->ptr);
-            }
+            kvstoreDictDelete(*type.serverPubSubChannels, slot, channel);
         }
     }
     /* Notify the client */
@@ -308,41 +306,40 @@ int pubsubUnsubscribeChannel(client *c, robj *channel, int notify, pubsubtype ty
     return retval;
 }
 
-void pubsubShardUnsubscribeAllClients(robj *channel) {
-    int retval;
-    dictEntry *de = dictFind(server.pubsubshard_channels, channel);
-    serverAssertWithInfo(NULL,channel,de != NULL);
-    list *clients = dictGetVal(de);
-    if (listLength(clients) > 0) {
+/* Unsubscribe all shard channels in a slot. */
+void pubsubShardUnsubscribeAllChannelsInSlot(unsigned int slot) {
+    if (!kvstoreDictSize(server.pubsubshard_channels, slot))
+        return;
+
+    kvstoreDictIterator *kvs_di = kvstoreGetDictSafeIterator(server.pubsubshard_channels, slot);
+    dictEntry *de;
+    while ((de = kvstoreDictIteratorNext(kvs_di)) != NULL) {
+        robj *channel = dictGetKey(de);
+        dict *clients = dictGetVal(de);
         /* For each client subscribed to the channel, unsubscribe it. */
-        listIter li;
-        listNode *ln;
-        listRewind(clients, &li);
-        while ((ln = listNext(&li)) != NULL) {
-            client *c = listNodeValue(ln);
-            retval = dictDelete(c->pubsubshard_channels, channel);
+        dictIterator *iter = dictGetIterator(clients);
+        dictEntry *entry;
+        while ((entry = dictNext(iter)) != NULL) {
+            client *c = dictGetKey(entry);
+            int retval = dictDelete(c->pubsubshard_channels, channel);
             serverAssertWithInfo(c,channel,retval == DICT_OK);
             addReplyPubsubUnsubscribed(c, channel, pubSubShardType);
             /* If the client has no other pubsub subscription,
              * move out of pubsub mode. */
             if (clientTotalPubSubSubscriptionCount(c) == 0) {
-                c->flags &= ~CLIENT_PUBSUB;
+                unmarkClientAsPubSub(c);
             }
         }
+        dictReleaseIterator(iter);
+        kvstoreDictDelete(server.pubsubshard_channels, slot, channel);
     }
-    /* Delete the channel from server pubsubshard channels hash table. */
-    retval = dictDelete(server.pubsubshard_channels, channel);
-    /* Delete the channel from slots_to_channel mapping. */
-    slotToChannelDel(channel->ptr);
-    serverAssertWithInfo(NULL,channel,retval == DICT_OK);
-    decrRefCount(channel); /* it is finally safe to release it */
+    kvstoreReleaseDictIterator(kvs_di);
 }
 
-
 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the client was already subscribed to that pattern. */
 int pubsubSubscribePattern(client *c, robj *pattern) {
     dictEntry *de;
-    list *clients;
+    dict *clients;
     int retval = 0;
 
     if (dictAdd(c->pubsub_patterns, pattern, NULL) == DICT_OK) {
@@ -351,13 +348,13 @@ int pubsubSubscribePattern(client *c, robj *pattern) {
         /* Add the client to the pattern -> list of clients hash table */
         de = dictFind(server.pubsub_patterns,pattern);
         if (de == NULL) {
-            clients = listCreate();
+            clients = dictCreate(&clientDictType);
             dictAdd(server.pubsub_patterns,pattern,clients);
             incrRefCount(pattern);
         } else {
             clients = dictGetVal(de);
         }
-        listAddNodeTail(clients,c);
+        serverAssert(dictAdd(clients, c, NULL) != DICT_ERR);
     }
     /* Notify the client */
     addReplyPubsubPatSubscribed(c,pattern);
@@ -368,8 +365,7 @@ int pubsubSubscribePattern(client *c, robj *pattern) {
  * 0 if the client was not subscribed to the specified channel. */
 int pubsubUnsubscribePattern(client *c, robj *pattern, int notify) {
     dictEntry *de;
-    list *clients;
-    listNode *ln;
+    dict *clients;
     int retval = 0;
 
     incrRefCount(pattern); /* Protect the object. May be the same we remove */
@@ -379,11 +375,9 @@ int pubsubUnsubscribePattern(client *c, robj *pattern, int notify) {
         de = dictFind(server.pubsub_patterns,pattern);
         serverAssertWithInfo(c,NULL,de != NULL);
         clients = dictGetVal(de);
-        ln = listSearchKey(clients,c);
-        serverAssertWithInfo(c,NULL,ln != NULL);
-        listDelNode(clients,ln);
-        if (listLength(clients) == 0) {
-            /* Free the list and associated hash entry at all if this was
+        serverAssertWithInfo(c, NULL, dictDelete(clients, c) == DICT_OK);
+        if (dictSize(clients) == 0) {
+            /* Free the dict and associated hash entry at all if this was
              * the latest client. */
             dictDelete(server.pubsub_patterns,pattern);
         }
@@ -432,17 +426,6 @@ int pubsubUnsubscribeShardAllChannels(client *c, int notify) {
     return count;
 }
 
-/*
- * Unsubscribe a client from provided shard subscribed channel(s).
- */
-void pubsubUnsubscribeShardChannels(robj **channels, unsigned int count) {
-    for (unsigned int j = 0; j < count; j++) {
-        /* Remove the channel from server and from the clients
-         * subscribed to it as well as notify them. */
-        pubsubShardUnsubscribeAllClients(channels[j]);
-    }
-}
-
 /* Unsubscribe from all the patterns. Return the number of patterns the
  * client was subscribed from. */
 int pubsubUnsubscribeAllPatterns(client *c, int notify) {
@@ -471,23 +454,24 @@ int pubsubPublishMessageInternal(robj *channel, robj *message, pubsubtype type)
     int receivers = 0;
     dictEntry *de;
     dictIterator *di;
-    listNode *ln;
-    listIter li;
+    unsigned int slot = 0;
 
     /* Send to clients listening for that channel */
-    de = dictFind(*type.serverPubSubChannels, channel);
+    if (server.cluster_enabled && type.shard) {
+        slot = keyHashSlot(channel->ptr, sdslen(channel->ptr));
+    }
+    de = kvstoreDictFind(*type.serverPubSubChannels, slot, channel);
     if (de) {
-        list *list = dictGetVal(de);
-        listNode *ln;
-        listIter li;
-
-        listRewind(list,&li);
-        while ((ln = listNext(&li)) != NULL) {
-            client *c = ln->value;
+        dict *clients = dictGetVal(de);
+        dictEntry *entry;
+        dictIterator *iter = dictGetIterator(clients);
+        while ((entry = dictNext(iter)) != NULL) {
+            client *c = dictGetKey(entry);
             addReplyPubsubMessage(c,channel,message,*type.messageBulk);
             updateClientMemUsageAndBucket(c);
             receivers++;
         }
+        dictReleaseIterator(iter);
     }
 
     if (type.shard) {
@@ -501,19 +485,21 @@ int pubsubPublishMessageInternal(robj *channel, robj *message, pubsubtype type)
         channel = getDecodedObject(channel);
         while((de = dictNext(di)) != NULL) {
             robj *pattern = dictGetKey(de);
-            list *clients = dictGetVal(de);
+            dict *clients = dictGetVal(de);
             if (!stringmatchlen((char*)pattern->ptr,
                                 sdslen(pattern->ptr),
                                 (char*)channel->ptr,
                                 sdslen(channel->ptr),0)) continue;
 
-            listRewind(clients,&li);
-            while ((ln = listNext(&li)) != NULL) {
-                client *c = listNodeValue(ln);
+            dictEntry *entry;
+            dictIterator *iter = dictGetIterator(clients);
+            while ((entry = dictNext(iter)) != NULL) {
+                client *c = dictGetKey(entry);
                 addReplyPubsubPatMessage(c,pattern,channel,message);
                 updateClientMemUsageAndBucket(c);
                 receivers++;
             }
+            dictReleaseIterator(iter);
         }
         decrRefCount(channel);
         dictReleaseIterator(di);
@@ -546,7 +532,7 @@ void subscribeCommand(client *c) {
     }
     for (j = 1; j < c->argc; j++)
         pubsubSubscribeChannel(c,c->argv[j],pubSubType);
-    c->flags |= CLIENT_PUBSUB;
+    markClientAsPubSub(c);
 }
 
 /* UNSUBSCRIBE [channel ...] */
@@ -559,7 +545,9 @@ void unsubscribeCommand(client *c) {
         for (j = 1; j < c->argc; j++)
             pubsubUnsubscribeChannel(c,c->argv[j],1,pubSubType);
     }
-    if (clientTotalPubSubSubscriptionCount(c) == 0) c->flags &= ~CLIENT_PUBSUB;
+    if (clientTotalPubSubSubscriptionCount(c) == 0) {
+        unmarkClientAsPubSub(c);
+    }
 }
 
 /* PSUBSCRIBE pattern [pattern ...] */
@@ -579,7 +567,7 @@ void psubscribeCommand(client *c) {
 
     for (j = 1; j < c->argc; j++)
         pubsubSubscribePattern(c,c->argv[j]);
-    c->flags |= CLIENT_PUBSUB;
+    markClientAsPubSub(c);
 }
 
 /* PUNSUBSCRIBE [pattern [pattern ...]] */
@@ -592,7 +580,9 @@ void punsubscribeCommand(client *c) {
         for (j = 1; j < c->argc; j++)
             pubsubUnsubscribePattern(c,c->argv[j],1);
     }
-    if (clientTotalPubSubSubscriptionCount(c) == 0) c->flags &= ~CLIENT_PUBSUB;
+    if (clientTotalPubSubSubscriptionCount(c) == 0) {
+        unmarkClientAsPubSub(c);
+    }
 }
 
 /* This function wraps pubsubPublishMessage and also propagates the message to cluster.
@@ -647,10 +637,10 @@ NULL
 
         addReplyArrayLen(c,(c->argc-2)*2);
         for (j = 2; j < c->argc; j++) {
-            list *l = dictFetchValue(server.pubsub_channels,c->argv[j]);
+            dict *d = kvstoreDictFetchValue(server.pubsub_channels, 0, c->argv[j]);
 
             addReplyBulk(c,c->argv[j]);
-            addReplyLongLong(c,l ? listLength(l) : 0);
+            addReplyLongLong(c, d ? dictSize(d) : 0);
         }
     } else if (!strcasecmp(c->argv[1]->ptr,"numpat") && c->argc == 2) {
         /* PUBSUB NUMPAT */
@@ -664,38 +654,43 @@ NULL
     } else if (!strcasecmp(c->argv[1]->ptr,"shardnumsub") && c->argc >= 2) {
         /* PUBSUB SHARDNUMSUB [ShardChannel_1 ... ShardChannel_N] */
         int j;
-
         addReplyArrayLen(c, (c->argc-2)*2);
         for (j = 2; j < c->argc; j++) {
-            list *l = dictFetchValue(server.pubsubshard_channels, c->argv[j]);
+            unsigned int slot = calculateKeySlot(c->argv[j]->ptr);
+            dict *clients = kvstoreDictFetchValue(server.pubsubshard_channels, slot, c->argv[j]);
 
             addReplyBulk(c,c->argv[j]);
-            addReplyLongLong(c,l ? listLength(l) : 0);
+            addReplyLongLong(c, clients ? dictSize(clients) : 0);
         }
     } else {
         addReplySubcommandSyntaxError(c);
     }
 }
 
-void channelList(client *c, sds pat, dict *pubsub_channels) {
-    dictIterator *di = dictGetIterator(pubsub_channels);
-    dictEntry *de;
+void channelList(client *c, sds pat, kvstore *pubsub_channels) {
     long mblen = 0;
     void *replylen;
+    unsigned int slot_cnt = kvstoreNumDicts(pubsub_channels);
 
     replylen = addReplyDeferredLen(c);
-    while((de = dictNext(di)) != NULL) {
-        robj *cobj = dictGetKey(de);
-        sds channel = cobj->ptr;
-
-        if (!pat || stringmatchlen(pat, sdslen(pat),
-                                   channel, sdslen(channel),0))
-        {
-            addReplyBulk(c,cobj);
-            mblen++;
+    for (unsigned int i = 0; i < slot_cnt; i++) {
+        if (!kvstoreDictSize(pubsub_channels, i))
+            continue;
+        kvstoreDictIterator *kvs_di = kvstoreGetDictIterator(pubsub_channels, i);
+        dictEntry *de;
+        while((de = kvstoreDictIteratorNext(kvs_di)) != NULL) {
+            robj *cobj = dictGetKey(de);
+            sds channel = cobj->ptr;
+
+            if (!pat || stringmatchlen(pat, sdslen(pat),
+                                    channel, sdslen(channel),0))
+            {
+                addReplyBulk(c,cobj);
+                mblen++;
+            }
         }
+        kvstoreReleaseDictIterator(kvs_di);
     }
-    dictReleaseIterator(di);
     setDeferredArrayLen(c,replylen,mblen);
 }
 
@@ -717,20 +712,11 @@ void ssubscribeCommand(client *c) {
     }
 
     for (int j = 1; j < c->argc; j++) {
-        /* A channel is only considered to be added, if a
-         * subscriber exists for it. And if a subscriber
-         * already exists the slotToChannel doesn't needs
-         * to be incremented. */
-        if (server.cluster_enabled &
-            (dictFind(*pubSubShardType.serverPubSubChannels, c->argv[j]) == NULL)) {
-            slotToChannelAdd(c->argv[j]->ptr);
-        }
         pubsubSubscribeChannel(c, c->argv[j], pubSubShardType);
     }
-    c->flags |= CLIENT_PUBSUB;
+    markClientAsPubSub(c);
 }
 
-
 /* SUNSUBSCRIBE [shardchannel [shardchannel ...]] */
 void sunsubscribeCommand(client *c) {
     if (c->argc == 1) {
@@ -740,7 +726,9 @@ void sunsubscribeCommand(client *c) {
             pubsubUnsubscribeChannel(c, c->argv[j], 1, pubSubShardType);
         }
     }
-    if (clientTotalPubSubSubscriptionCount(c) == 0) c->flags &= ~CLIENT_PUBSUB;
+    if (clientTotalPubSubSubscriptionCount(c) == 0) {
+        unmarkClientAsPubSub(c);
+    }
 }
 
 size_t pubsubMemOverhead(client *c) {
@@ -752,3 +740,9 @@ size_t pubsubMemOverhead(client *c) {
     mem += dictMemUsage(c->pubsubshard_channels);
     return mem;
 }
+
+int pubsubTotalSubscriptions(void) {
+    return dictSize(server.pubsub_patterns) +
+           kvstoreSize(server.pubsub_channels) +
+           kvstoreSize(server.pubsubshard_channels);
+}
diff --git a/src/quicklist.c b/src/quicklist.c
index 301a2166ee7..7fe3430fced 100644
--- a/src/quicklist.c
+++ b/src/quicklist.c
@@ -48,18 +48,14 @@
  * just one byte, it still won't overflow the 16 bit count field. */
 static const size_t optimization_level[] = {4096, 8192, 16384, 32768, 65536};
 
-/* packed_threshold is initialized to 1gb*/
-static size_t packed_threshold = (1 << 30);
+/* This is for test suite development purposes only, 0 means disabled. */
+static size_t packed_threshold = 0;
 
-/* set threshold for PLAIN nodes, the real limit is 4gb */
-#define isLargeElement(size) ((size) >= packed_threshold)
-
-int quicklistisSetPackedThreshold(size_t sz) {
+/* set threshold for PLAIN nodes for test suit, the real limit is based on `fill` */
+int quicklistSetPackedThreshold(size_t sz) {
     /* Don't allow threshold to be set above or even slightly below 4GB */
     if (sz > (1ull<<32) - (1<<20)) {
         return 0;
-    } else if (sz == 0) { /* 0 means restore threshold */
-        sz = (1 << 30);
     }
     packed_threshold = sz;
     return 1;
@@ -104,6 +100,9 @@ quicklistBookmark *_quicklistBookmarkFindByName(quicklist *ql, const char *name)
 quicklistBookmark *_quicklistBookmarkFindByNode(quicklist *ql, quicklistNode *node);
 void _quicklistBookmarkDelete(quicklist *ql, quicklistBookmark *bm);
 
+REDIS_STATIC quicklistNode *_quicklistSplitNode(quicklistNode *node, int offset, int after);
+REDIS_STATIC quicklistNode *_quicklistMergeNodes(quicklist *quicklist, quicklistNode *center);
+
 /* Simple way to give quicklistEntry structs default values with one call. */
 #define initEntry(e)                                                           \
     do {                                                                       \
@@ -158,9 +157,9 @@ void quicklistSetFill(quicklist *quicklist, int fill) {
     quicklist->fill = fill;
 }
 
-void quicklistSetOptions(quicklist *quicklist, int fill, int depth) {
+void quicklistSetOptions(quicklist *quicklist, int fill, int compress) {
     quicklistSetFill(quicklist, fill);
-    quicklistSetCompressDepth(quicklist, depth);
+    quicklistSetCompressDepth(quicklist, compress);
 }
 
 /* Create a new quicklist with some default parameters. */
@@ -378,6 +377,15 @@ REDIS_STATIC void __quicklistCompress(const quicklist *quicklist,
     quicklistCompressNode(reverse);
 }
 
+/* This macro is used to compress a node.
+ *
+ * If the 'recompress' flag of the node is true, we compress it directly without
+ * checking whether it is within the range of compress depth.
+ * However, it's important to ensure that the 'recompress' flag of head and tail
+ * is always false, as we always assume that head and tail are not compressed.
+ * 
+ * If the 'recompress' flag of the node is false, we check whether the node is
+ * within the range of compress depth before compressing it. */
 #define quicklistCompress(_ql, _node)                                          \
     do {                                                                       \
         if ((_node)->recompress)                                               \
@@ -450,6 +458,15 @@ REDIS_STATIC void _quicklistInsertNodeAfter(quicklist *quicklist,
 
 #define sizeMeetsSafetyLimit(sz) ((sz) <= SIZE_SAFETY_LIMIT)
 
+/* Calculate the size limit of the quicklist node based on negative 'fill'. */
+static size_t quicklistNodeNegFillLimit(int fill) {
+    assert(fill < 0);
+    size_t offset = (-fill) - 1;
+    size_t max_level = sizeof(optimization_level) / sizeof(*optimization_level);
+    if (offset >= max_level) offset = max_level - 1;
+    return optimization_level[offset];
+}
+
 /* Calculate the size limit or length limit of the quicklist node
  * based on 'fill', and is also used to limit list listpack. */
 void quicklistNodeLimit(int fill, size_t *size, unsigned int *count) {
@@ -460,10 +477,7 @@ void quicklistNodeLimit(int fill, size_t *size, unsigned int *count) {
         /* Ensure that one node have at least one entry */
         *count = (fill == 0) ? 1 : fill;
     } else {
-        size_t offset = (-fill) - 1;
-        size_t max_level = sizeof(optimization_level) / sizeof(*optimization_level);
-        if (offset >= max_level) offset = max_level - 1;
-        *size = optimization_level[offset];
+        *size = quicklistNodeNegFillLimit(fill);
     }
 }
 
@@ -488,12 +502,23 @@ int quicklistNodeExceedsLimit(int fill, size_t new_sz, unsigned int new_count) {
     redis_unreachable();
 }
 
+/* Determines whether a given size qualifies as a large element based on a threshold
+ * determined by the 'fill'. If the size is considered large, it will be stored in
+ * a plain node. */
+static int isLargeElement(size_t sz, int fill) {
+    if (unlikely(packed_threshold != 0)) return sz >= packed_threshold;
+    if (fill >= 0)
+        return !sizeMeetsSafetyLimit(sz);
+    else
+        return sz > quicklistNodeNegFillLimit(fill);
+}
+
 REDIS_STATIC int _quicklistNodeAllowInsert(const quicklistNode *node,
                                            const int fill, const size_t sz) {
     if (unlikely(!node))
         return 0;
 
-    if (unlikely(QL_NODE_IS_PLAIN(node) || isLargeElement(sz)))
+    if (unlikely(QL_NODE_IS_PLAIN(node) || isLargeElement(sz, fill)))
         return 0;
 
     /* Estimate how many bytes will be added to the listpack by this one entry.
@@ -529,19 +554,25 @@ REDIS_STATIC int _quicklistNodeAllowMerge(const quicklistNode *a,
         (node)->sz = lpBytes((node)->entry);                                   \
     } while (0)
 
-static quicklistNode* __quicklistCreatePlainNode(void *value, size_t sz) {
+static quicklistNode* __quicklistCreateNode(int container, void *value, size_t sz) {
     quicklistNode *new_node = quicklistCreateNode();
-    new_node->entry = zmalloc(sz);
-    new_node->container = QUICKLIST_NODE_CONTAINER_PLAIN;
-    memcpy(new_node->entry, value, sz);
+    new_node->container = container;
+    if (container == QUICKLIST_NODE_CONTAINER_PLAIN) {
+        new_node->entry = zmalloc(sz);
+        memcpy(new_node->entry, value, sz);
+    } else {
+        new_node->entry = lpPrepend(lpNew(0), value, sz);
+    }
     new_node->sz = sz;
     new_node->count++;
     return new_node;
 }
 
 static void __quicklistInsertPlainNode(quicklist *quicklist, quicklistNode *old_node,
-                                       void *value, size_t sz, int after) {
-    __quicklistInsertNode(quicklist, old_node, __quicklistCreatePlainNode(value, sz), after);
+                                       void *value, size_t sz, int after)
+{
+    quicklistNode *new_node = __quicklistCreateNode(QUICKLIST_NODE_CONTAINER_PLAIN, value, sz);
+    __quicklistInsertNode(quicklist, old_node, new_node, after);
     quicklist->count++;
 }
 
@@ -552,7 +583,7 @@ static void __quicklistInsertPlainNode(quicklist *quicklist, quicklistNode *old_
 int quicklistPushHead(quicklist *quicklist, void *value, size_t sz) {
     quicklistNode *orig_head = quicklist->head;
 
-    if (unlikely(isLargeElement(sz))) {
+    if (unlikely(isLargeElement(sz, quicklist->fill))) {
         __quicklistInsertPlainNode(quicklist, quicklist->head, value, sz, 0);
         return 1;
     }
@@ -579,7 +610,7 @@ int quicklistPushHead(quicklist *quicklist, void *value, size_t sz) {
  * Returns 1 if new tail created. */
 int quicklistPushTail(quicklist *quicklist, void *value, size_t sz) {
     quicklistNode *orig_tail = quicklist->tail;
-    if (unlikely(isLargeElement(sz))) {
+    if (unlikely(isLargeElement(sz, quicklist->fill))) {
         __quicklistInsertPlainNode(quicklist, quicklist->tail, value, sz, 1);
         return 1;
     }
@@ -741,14 +772,18 @@ void quicklistReplaceEntry(quicklistIter *iter, quicklistEntry *entry,
                            void *data, size_t sz)
 {
     quicklist* quicklist = iter->quicklist;
+    quicklistNode *node = entry->node;
+    unsigned char *newentry;
 
-    if (likely(!QL_NODE_IS_PLAIN(entry->node) && !isLargeElement(sz))) {
-        entry->node->entry = lpReplace(entry->node->entry, &entry->zi, data, sz);
+    if (likely(!QL_NODE_IS_PLAIN(entry->node) && !isLargeElement(sz, quicklist->fill) &&
+        (newentry = lpReplace(entry->node->entry, &entry->zi, data, sz)) != NULL))
+    {
+        entry->node->entry = newentry;
         quicklistNodeUpdateSz(entry->node);
         /* quicklistNext() and quicklistGetIteratorEntryAtIdx() provide an uncompressed node */
         quicklistCompress(quicklist, entry->node);
     } else if (QL_NODE_IS_PLAIN(entry->node)) {
-        if (isLargeElement(sz)) {
+        if (isLargeElement(sz, quicklist->fill)) {
             zfree(entry->node->entry);
             entry->node->entry = zmalloc(sz);
             entry->node->sz = sz;
@@ -758,17 +793,37 @@ void quicklistReplaceEntry(quicklistIter *iter, quicklistEntry *entry,
             quicklistInsertAfter(iter, entry, data, sz);
             __quicklistDelNode(quicklist, entry->node);
         }
-    } else {
-        entry->node->dont_compress = 1; /* Prevent compression in quicklistInsertAfter() */
-        quicklistInsertAfter(iter, entry, data, sz);
+    } else { /* The node is full or data is a large element */
+        quicklistNode *split_node = NULL, *new_node;
+        node->dont_compress = 1; /* Prevent compression in __quicklistInsertNode() */
+
+        /* If the entry is not at the tail, split the node at the entry's offset. */
+        if (entry->offset != node->count - 1 && entry->offset != -1)
+            split_node = _quicklistSplitNode(node, entry->offset, 1);
+
+        /* Create a new node and insert it after the original node.
+         * If the original node was split, insert the split node after the new node. */
+        new_node = __quicklistCreateNode(isLargeElement(sz, quicklist->fill) ?
+            QUICKLIST_NODE_CONTAINER_PLAIN : QUICKLIST_NODE_CONTAINER_PACKED, data, sz);
+        __quicklistInsertNode(quicklist, node, new_node, 1);
+        if (split_node) __quicklistInsertNode(quicklist, new_node, split_node, 1);
+        quicklist->count++;
+
+        /* Delete the replaced element. */
         if (entry->node->count == 1) {
             __quicklistDelNode(quicklist, entry->node);
         } else {
             unsigned char *p = lpSeek(entry->node->entry, -1);
             quicklistDelIndex(quicklist, entry->node, &p);
             entry->node->dont_compress = 0; /* Re-enable compression */
-            quicklistCompress(quicklist, entry->node);
-            quicklistCompress(quicklist, entry->node->next);
+            new_node = _quicklistMergeNodes(quicklist, new_node);
+            /* We can't know if the current node and its sibling nodes are correctly compressed,
+             * and we don't know if they are within the range of compress depth, so we need to
+             * use quicklistCompress() for compression, which checks if node is within compress
+             * depth before compressing. */
+            quicklistCompress(quicklist, new_node);
+            quicklistCompress(quicklist, new_node->prev);
+            if (new_node->next) quicklistCompress(quicklist, new_node->next);
         }
     }
 
@@ -826,6 +881,8 @@ REDIS_STATIC quicklistNode *_quicklistListpackMerge(quicklist *quicklist,
         }
         keep->count = lpLength(keep->entry);
         quicklistNodeUpdateSz(keep);
+        keep->recompress = 0; /* Prevent 'keep' from being recompressed if
+                               * it becomes head or tail after merging. */
 
         nokeep->count = 0;
         __quicklistDelNode(quicklist, nokeep);
@@ -844,9 +901,10 @@ REDIS_STATIC quicklistNode *_quicklistListpackMerge(quicklist *quicklist,
  *   - (center->next, center->next->next)
  *   - (center->prev, center)
  *   - (center, center->next)
+ * 
+ * Returns the new 'center' after merging.
  */
-REDIS_STATIC void _quicklistMergeNodes(quicklist *quicklist,
-                                       quicklistNode *center) {
+REDIS_STATIC quicklistNode *_quicklistMergeNodes(quicklist *quicklist, quicklistNode *center) {
     int fill = quicklist->fill;
     quicklistNode *prev, *prev_prev, *next, *next_next, *target;
     prev = prev_prev = next = next_next = target = NULL;
@@ -886,8 +944,9 @@ REDIS_STATIC void _quicklistMergeNodes(quicklist *quicklist,
 
     /* Use result of center merge (or original) to merge with next node. */
     if (_quicklistNodeAllowMerge(target, target->next, fill)) {
-        _quicklistListpackMerge(quicklist, target, target->next);
+        target = _quicklistListpackMerge(quicklist, target, target->next);
     }
+    return target;
 }
 
 /* Split 'node' into two parts, parameterized by 'offset' and 'after'.
@@ -959,7 +1018,7 @@ REDIS_STATIC void _quicklistInsert(quicklistIter *iter, quicklistEntry *entry,
     if (!node) {
         /* we have no reference node, so let's create only node in the list */
         D("No node given!");
-        if (unlikely(isLargeElement(sz))) {
+        if (unlikely(isLargeElement(sz, quicklist->fill))) {
             __quicklistInsertPlainNode(quicklist, quicklist->tail, value, sz, after);
             return;
         }
@@ -996,13 +1055,13 @@ REDIS_STATIC void _quicklistInsert(quicklistIter *iter, quicklistEntry *entry,
         }
     }
 
-    if (unlikely(isLargeElement(sz))) {
+    if (unlikely(isLargeElement(sz, quicklist->fill))) {
         if (QL_NODE_IS_PLAIN(node) || (at_tail && after) || (at_head && !after)) {
             __quicklistInsertPlainNode(quicklist, node, value, sz, after);
         } else {
             quicklistDecompressNodeForUse(node);
             new_node = _quicklistSplitNode(node, entry->offset, after);
-            quicklistNode *entry_node = __quicklistCreatePlainNode(value, sz);
+            quicklistNode *entry_node = __quicklistCreateNode(QUICKLIST_NODE_CONTAINER_PLAIN, value, sz);
             __quicklistInsertNode(quicklist, node, entry_node, after);
             __quicklistInsertNode(quicklist, entry_node, new_node, after);
             quicklist->count++;
@@ -2061,20 +2120,23 @@ int quicklistTest(int argc, char *argv[], int flags) {
         }
 
         TEST("Comprassion Plain node") {
-            char buf[256];
-            quicklistisSetPackedThreshold(1);
-            quicklist *ql = quicklistNew(-2, 1);
+        for (int f = 0; f < fill_count; f++) {
+            size_t large_limit = (fills[f] < 0) ? quicklistNodeNegFillLimit(fills[f]) + 1 : SIZE_SAFETY_LIMIT + 1;
+
+            char buf[large_limit];
+            quicklist *ql = quicklistNew(fills[f], 1);
             for (int i = 0; i < 500; i++) {
                 /* Set to 256 to allow the node to be triggered to compress,
                  * if it is less than 48(nocompress), the test will be successful. */
                 snprintf(buf, sizeof(buf), "hello%d", i);
-                quicklistPushHead(ql, buf, 256);
+                quicklistPushHead(ql, buf, large_limit);
             }
 
             quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL);
             quicklistEntry entry;
             int i = 0;
             while (quicklistNext(iter, &entry)) {
+                assert(QL_NODE_IS_PLAIN(entry.node));
                 snprintf(buf, sizeof(buf), "hello%d", i);
                 if (strcmp((char *)entry.value, buf))
                     ERR("value [%s] didn't match [%s] at position %d",
@@ -2084,42 +2146,57 @@ int quicklistTest(int argc, char *argv[], int flags) {
             ql_release_iterator(iter);
             quicklistRelease(ql);
         }
+        }
 
-        TEST("NEXT plain node")
-        {
-            packed_threshold = 3;
-            quicklist *ql = quicklistNew(-2, options[_i]);
-            char *strings[] = {"hello1", "hello2", "h3", "h4", "hello5"};
+        TEST("NEXT plain node") {
+        for (int f = 0; f < fill_count; f++) {
+            size_t large_limit = (fills[f] < 0) ? quicklistNodeNegFillLimit(fills[f]) + 1 : SIZE_SAFETY_LIMIT + 1;
+            quicklist *ql = quicklistNew(fills[f], options[_i]);
 
-            for (int i = 0; i < 5; ++i)
-                quicklistPushHead(ql, strings[i], strlen(strings[i]));
+            char buf[large_limit];
+            memcpy(buf, "plain", 5);
+            quicklistPushHead(ql, buf, large_limit);
+            quicklistPushHead(ql, buf, large_limit);
+            quicklistPushHead(ql, "packed3", 7);
+            quicklistPushHead(ql, "packed4", 7);
+            quicklistPushHead(ql, buf, large_limit);
 
             quicklistEntry entry;
             quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL);
-            int j = 0;
 
             while(quicklistNext(iter, &entry) != 0) {
-                assert(strncmp(strings[j], (char *)entry.value, strlen(strings[j])) == 0);
-                j++;
+                if (QL_NODE_IS_PLAIN(entry.node))
+                    assert(!memcmp(entry.value, "plain", 5));
+                else
+                    assert(!memcmp(entry.value, "packed", 6));
             }
             ql_release_iterator(iter);
             quicklistRelease(ql);
         }
+        }
 
         TEST("rotate plain node ") {
+        for (int f = 0; f < fill_count; f++) {
+            size_t large_limit = (fills[f] < 0) ? quicklistNodeNegFillLimit(fills[f]) + 1 : SIZE_SAFETY_LIMIT + 1;
+
             unsigned char *data = NULL;
             size_t sz;
             long long lv;
             int i =0;
-            packed_threshold = 5;
-            quicklist *ql = quicklistNew(-2, options[_i]);
-            quicklistPushHead(ql, "hello1", 6);
-            quicklistPushHead(ql, "hello4", 6);
-            quicklistPushHead(ql, "hello3", 6);
-            quicklistPushHead(ql, "hello2", 6);
+            quicklist *ql = quicklistNew(fills[f], options[_i]);
+            char buf[large_limit];
+            memcpy(buf, "hello1", 6);
+            quicklistPushHead(ql, buf, large_limit);
+            memcpy(buf, "hello4", 6);
+            quicklistPushHead(ql, buf, large_limit);
+            memcpy(buf, "hello3", 6);
+            quicklistPushHead(ql, buf, large_limit);
+            memcpy(buf, "hello2", 6);
+            quicklistPushHead(ql, buf, large_limit);
             quicklistRotate(ql);
 
             for(i = 1 ; i < 5; i++) {
+                assert(QL_NODE_IS_PLAIN(ql->tail));
                 quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv);
                 int temp_char = data[5];
                 zfree(data);
@@ -2128,7 +2205,7 @@ int quicklistTest(int argc, char *argv[], int flags) {
 
             ql_verify(ql, 0, 0, 0, 0);
             quicklistRelease(ql);
-            packed_threshold = (1 << 30);
+        }
         }
 
         TEST("rotate one val once") {
@@ -3224,7 +3301,7 @@ int quicklistTest(int argc, char *argv[], int flags) {
             memcpy(s, "helloworld", 10);
             memcpy(s + sz - 10, "1234567890", 10);
 
-            quicklistNode *node = __quicklistCreatePlainNode(s, sz);
+            quicklistNode *node = __quicklistCreateNode(QUICKLIST_NODE_CONTAINER_PLAIN, s, sz);
 
             /* Just to avoid triggering the assertion in __quicklistCompressNode(),
              * it disables the passing of quicklist head or tail node. */
diff --git a/src/quicklist.h b/src/quicklist.h
index f17834b9943..c4b07e0c009 100644
--- a/src/quicklist.h
+++ b/src/quicklist.h
@@ -42,7 +42,8 @@
  * container: 2 bits, PLAIN=1 (a single item as char array), PACKED=2 (listpack with multiple items).
  * recompress: 1 bit, bool, true if node is temporary decompressed for usage.
  * attempted_compress: 1 bit, boolean, used for verifying during testing.
- * extra: 10 bits, free for future use; pads out the remainder of 32 bits */
+ * dont_compress: 1 bit, boolean, used for preventing compression of entry.
+ * extra: 9 bits, free for future use; pads out the remainder of 32 bits */
 typedef struct quicklistNode {
     struct quicklistNode *prev;
     struct quicklistNode *next;
@@ -154,9 +155,9 @@ typedef struct quicklistEntry {
 /* Prototypes */
 quicklist *quicklistCreate(void);
 quicklist *quicklistNew(int fill, int compress);
-void quicklistSetCompressDepth(quicklist *quicklist, int depth);
+void quicklistSetCompressDepth(quicklist *quicklist, int compress);
 void quicklistSetFill(quicklist *quicklist, int fill);
-void quicklistSetOptions(quicklist *quicklist, int fill, int depth);
+void quicklistSetOptions(quicklist *quicklist, int fill, int compress);
 void quicklistRelease(quicklist *quicklist);
 int quicklistPushHead(quicklist *quicklist, void *value, const size_t sz);
 int quicklistPushTail(quicklist *quicklist, void *value, const size_t sz);
@@ -201,7 +202,7 @@ int quicklistBookmarkCreate(quicklist **ql_ref, const char *name, quicklistNode
 int quicklistBookmarkDelete(quicklist *ql, const char *name);
 quicklistNode *quicklistBookmarkFind(quicklist *ql, const char *name);
 void quicklistBookmarksClear(quicklist *ql);
-int quicklistisSetPackedThreshold(size_t sz);
+int quicklistSetPackedThreshold(size_t sz);
 
 #ifdef REDIS_TEST
 int quicklistTest(int argc, char *argv[], int flags);
diff --git a/src/rand.c b/src/rand.c
index e1e98e63b99..6256c3bc3c2 100644
--- a/src/rand.c
+++ b/src/rand.c
@@ -13,7 +13,7 @@
  *
  * ----------------------------------------------------------------------------
  *
- * Copyright (c) 2010-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2010-current, Redis Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/src/rand.h b/src/rand.h
index 9884915a97d..ccacbf28a45 100644
--- a/src/rand.h
+++ b/src/rand.h
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef REDIS_RANDOM_H
diff --git a/src/rax.c b/src/rax.c
index 287f9855d5b..491e50aa05d 100644
--- a/src/rax.c
+++ b/src/rax.c
@@ -1,42 +1,19 @@
 /* Rax -- A radix tree implementation.
  *
- * Version 1.2 -- 7 February 2019
- *
- * Copyright (c) 2017-2019, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2017-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include <stdlib.h>
 #include <string.h>
-#include <assert.h>
 #include <stdio.h>
 #include <errno.h>
 #include <math.h>
 #include "rax.h"
+#include "redisassert.h"
 
 #ifndef RAX_MALLOC_INCLUDE
 #define RAX_MALLOC_INCLUDE "rax_malloc.h"
@@ -44,11 +21,6 @@
 
 #include RAX_MALLOC_INCLUDE
 
-/* This is a special pointer that is guaranteed to never have the same value
- * of a radix tree node. It's used in order to report "not found" error without
- * requiring the function to have multiple return values. */
-void *raxNotFound = (void*)"rax-not-found-pointer";
-
 /* -------------------------------- Debugging ------------------------------ */
 
 void raxDebugShowNode(const char *msg, raxNode *n);
@@ -201,11 +173,16 @@ raxNode *raxNewNode(size_t children, int datafield) {
 /* Allocate a new rax and return its pointer. On out of memory the function
  * returns NULL. */
 rax *raxNew(void) {
-    rax *rax = rax_malloc(sizeof(*rax));
+    return raxNewWithMetadata(0);
+}
+
+/* Allocate a new rax with metadata */
+rax *raxNewWithMetadata(int metaSize) {
+    rax *rax = rax_malloc(sizeof(*rax) + metaSize);
     if (rax == NULL) return NULL;
     rax->numele = 0;
     rax->numnodes = 1;
-    rax->head = raxNewNode(0,0);
+    rax->head = raxNewNode(0, 0);
     if (rax->head == NULL) {
         rax_free(rax);
         return NULL;
@@ -912,18 +889,19 @@ int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old)
     return raxGenericInsert(rax,s,len,data,old,0);
 }
 
-/* Find a key in the rax, returns raxNotFound special void pointer value
- * if the item was not found, otherwise the value associated with the
- * item is returned. */
-void *raxFind(rax *rax, unsigned char *s, size_t len) {
+/* Find a key in the rax: return 1 if the item is found, 0 otherwise.
+ * If there is an item and 'value' is passed in a non-NULL pointer,
+ * the value associated with the item is set at that address. */
+int raxFind(rax *rax, unsigned char *s, size_t len, void **value) {
     raxNode *h;
 
     debugf("### Lookup: %.*s\n", (int)len, s);
     int splitpos = 0;
     size_t i = raxLowWalk(rax,s,len,&h,NULL,&splitpos,NULL);
     if (i != len || (h->iscompr && splitpos != 0) || !h->iskey)
-        return raxNotFound;
-    return raxGetData(h);
+        return 0;
+    if (value != NULL) *value = raxGetData(h);
+    return 1;
 }
 
 /* Return the memory address where the 'parent' node stores the specified
@@ -1237,6 +1215,25 @@ void raxRecursiveFree(rax *rax, raxNode *n, void (*free_callback)(void*)) {
     rax->numnodes--;
 }
 
+/* Same as raxRecursiveFree() with context argument */
+void raxRecursiveFreeWithCtx(rax *rax, raxNode *n,
+                            void (*free_callback)(void *item, void *ctx), void *ctx) {
+    debugnode("free traversing",n);
+    int numchildren = n->iscompr ? 1 : n->size;
+    raxNode **cp = raxNodeLastChildPtr(n);
+    while(numchildren--) {
+        raxNode *child;
+        memcpy(&child,cp,sizeof(child));
+        raxRecursiveFreeWithCtx(rax,child,free_callback, ctx);
+        cp--;
+    }
+    debugnode("free depth-first",n);
+    if (free_callback && n->iskey && !n->isnull)
+        free_callback(raxGetData(n), ctx);
+    rax_free(n);
+    rax->numnodes--;
+}
+
 /* Free a whole radix tree, calling the specified callback in order to
  * free the auxiliary data. */
 void raxFreeWithCallback(rax *rax, void (*free_callback)(void*)) {
@@ -1245,6 +1242,15 @@ void raxFreeWithCallback(rax *rax, void (*free_callback)(void*)) {
     rax_free(rax);
 }
 
+/* Free a whole radix tree, calling the specified callback in order to
+ * free the auxiliary data. */
+void raxFreeWithCbAndContext(rax *rax,
+                             void (*free_callback)(void *item, void *ctx), void *ctx) {
+    raxRecursiveFreeWithCtx(rax,rax->head,free_callback,ctx);
+    assert(rax->numnodes == 0);
+    rax_free(rax);
+}
+
 /* Free a whole radix tree. */
 void raxFree(rax *rax) {
     raxFreeWithCallback(rax,NULL);
diff --git a/src/rax.h b/src/rax.h
index 6b1fd4188cc..74963acaddb 100644
--- a/src/rax.h
+++ b/src/rax.h
@@ -1,31 +1,10 @@
 /* Rax -- A radix tree implementation.
  *
- * Copyright (c) 2017-2018, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2017-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef RAX_H
@@ -134,6 +113,7 @@ typedef struct rax {
     raxNode *head;
     uint64_t numele;
     uint64_t numnodes;
+    void *metadata[];
 } rax;
 
 /* Stack data structure used by raxLowWalk() in order to, optionally, return
@@ -185,17 +165,18 @@ typedef struct raxIterator {
     raxNodeCallback node_cb; /* Optional node callback. Normally set to NULL. */
 } raxIterator;
 
-/* A special pointer returned for not found items. */
-extern void *raxNotFound;
-
 /* Exported API. */
 rax *raxNew(void);
+rax *raxNewWithMetadata(int metaSize);
 int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);
 int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);
 int raxRemove(rax *rax, unsigned char *s, size_t len, void **old);
-void *raxFind(rax *rax, unsigned char *s, size_t len);
+int raxFind(rax *rax, unsigned char *s, size_t len, void **value);
 void raxFree(rax *rax);
 void raxFreeWithCallback(rax *rax, void (*free_callback)(void*));
+void raxFreeWithCbAndContext(rax *rax,
+                             void (*free_callback)(void *item, void *ctx),
+                             void *ctx);
 void raxStart(raxIterator *it, rax *rt);
 int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len);
 int raxNext(raxIterator *it);
diff --git a/src/rax_malloc.h b/src/rax_malloc.h
index 9295985c653..a45bc98db2b 100644
--- a/src/rax_malloc.h
+++ b/src/rax_malloc.h
@@ -1,31 +1,10 @@
 /* Rax -- A radix tree implementation.
  *
- * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2017-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 /* Allocator selection.
diff --git a/src/rdb.c b/src/rdb.c
index cfc92e815bc..c5c0b04f66b 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -137,7 +116,7 @@ time_t rdbLoadTime(rio *rdb) {
     return (time_t)t32;
 }
 
-int rdbSaveMillisecondTime(rio *rdb, long long t) {
+ssize_t rdbSaveMillisecondTime(rio *rdb, long long t) {
     int64_t t64 = (int64_t) t;
     memrev64ifbe(&t64); /* Store in little endian. */
     return rdbWriteRaw(rdb,&t64,8);
@@ -289,8 +268,9 @@ int rdbEncodeInteger(long long value, unsigned char *enc) {
  * The returned value changes according to the flags, see
  * rdbGenericLoadStringObject() for more info. */
 void *rdbLoadIntegerObject(rio *rdb, int enctype, int flags, size_t *lenptr) {
-    int plain = flags & RDB_LOAD_PLAIN;
-    int sds = flags & RDB_LOAD_SDS;
+    int plainFlag = flags & RDB_LOAD_PLAIN;
+    int sdsFlag = flags & RDB_LOAD_SDS;
+    int hfldFlag = flags & (RDB_LOAD_HFLD|RDB_LOAD_HFLD_TTL);
     int encode = flags & RDB_LOAD_ENC;
     unsigned char enc[4];
     long long val;
@@ -316,11 +296,17 @@ void *rdbLoadIntegerObject(rio *rdb, int enctype, int flags, size_t *lenptr) {
         rdbReportCorruptRDB("Unknown RDB integer encoding type %d",enctype);
         return NULL; /* Never reached. */
     }
-    if (plain || sds) {
+    if (plainFlag || sdsFlag || hfldFlag) {
         char buf[LONG_STR_SIZE], *p;
         int len = ll2string(buf,sizeof(buf),val);
         if (lenptr) *lenptr = len;
-        p = plain ? zmalloc(len) : sdsnewlen(SDS_NOINIT,len);
+        if (plainFlag) {
+            p = zmalloc(len);
+        } else if (sdsFlag) {
+            p = sdsnewlen(SDS_NOINIT,len);
+        } else { /* hfldFlag */
+            p = hfieldNew(NULL, len, (flags&RDB_LOAD_HFLD) ? 0 : 1);
+        }
         memcpy(p,buf,len);
         return p;
     } else if (encode) {
@@ -389,8 +375,11 @@ ssize_t rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
  * changes according to 'flags'. For more info check the
  * rdbGenericLoadStringObject() function. */
 void *rdbLoadLzfStringObject(rio *rdb, int flags, size_t *lenptr) {
-    int plain = flags & RDB_LOAD_PLAIN;
-    int sds = flags & RDB_LOAD_SDS;
+    int plainFlag = flags & RDB_LOAD_PLAIN;
+    int sdsFlag = flags & RDB_LOAD_SDS;
+    int hfldFlag = flags & (RDB_LOAD_HFLD | RDB_LOAD_HFLD_TTL);
+    int robjFlag = (!(plainFlag || sdsFlag || hfldFlag)); /* not plain/sds/hfld */
+
     uint64_t len, clen;
     unsigned char *c = NULL;
     char *val = NULL;
@@ -403,11 +392,14 @@ void *rdbLoadLzfStringObject(rio *rdb, int flags, size_t *lenptr) {
     }
 
     /* Allocate our target according to the uncompressed size. */
-    if (plain) {
+    if (plainFlag) {
         val = ztrymalloc(len);
-    } else {
+    } else if (sdsFlag || robjFlag) {
         val = sdstrynewlen(SDS_NOINIT,len);
+    } else { /* hfldFlag */
+        val = hfieldTryNew(NULL, len, (flags&RDB_LOAD_HFLD) ? 0 : 1);
     }
+
     if (!val) {
         serverLog(isRestoreContext()? LL_VERBOSE: LL_WARNING, "rdbLoadLzfStringObject failed allocating %llu bytes", (unsigned long long)len);
         goto err;
@@ -423,17 +415,17 @@ void *rdbLoadLzfStringObject(rio *rdb, int flags, size_t *lenptr) {
     }
     zfree(c);
 
-    if (plain || sds) {
-        return val;
-    } else {
-        return createObject(OBJ_STRING,val);
-    }
+    return (robjFlag) ? createObject(OBJ_STRING,val) : (void *) val;
+
 err:
     zfree(c);
-    if (plain)
+    if (plainFlag) {
         zfree(val);
-    else
+    } else if (sdsFlag || robjFlag) {
         sdsfree(val);
+    } else { /* hfldFlag*/
+        hfieldFree(val);
+    }
     return NULL;
 }
 
@@ -512,12 +504,18 @@ ssize_t rdbSaveStringObject(rio *rdb, robj *obj) {
  * RDB_LOAD_PLAIN: Return a plain string allocated with zmalloc()
  *                 instead of a Redis object with an sds in it.
  * RDB_LOAD_SDS: Return an SDS string instead of a Redis object.
+ * RDB_LOAD_HFLD: Return a hash field object (mstr)
+ * RDB_LOAD_HFLD_TTL: Return a hash field with TTL metadata reserved
  *
  * On I/O error NULL is returned.
  */
 void *rdbGenericLoadStringObject(rio *rdb, int flags, size_t *lenptr) {
-    int plain = flags & RDB_LOAD_PLAIN;
-    int sds = flags & RDB_LOAD_SDS;
+    void *buf;
+    int plainFlag = flags & RDB_LOAD_PLAIN;
+    int sdsFlag = flags & RDB_LOAD_SDS;
+    int hfldFlag = flags & (RDB_LOAD_HFLD|RDB_LOAD_HFLD_TTL);
+    int robjFlag = (!(plainFlag || sdsFlag || hfldFlag)); /* not plain/sds/hfld */
+
     int isencoded;
     unsigned long long len;
 
@@ -538,22 +536,8 @@ void *rdbGenericLoadStringObject(rio *rdb, int flags, size_t *lenptr) {
         }
     }
 
-    if (plain || sds) {
-        void *buf = plain ? ztrymalloc(len) : sdstrynewlen(SDS_NOINIT,len);
-        if (!buf) {
-            serverLog(isRestoreContext()? LL_VERBOSE: LL_WARNING, "rdbGenericLoadStringObject failed allocating %llu bytes", len);
-            return NULL;
-        }
-        if (lenptr) *lenptr = len;
-        if (len && rioRead(rdb,buf,len) == 0) {
-            if (plain)
-                zfree(buf);
-            else
-                sdsfree(buf);
-            return NULL;
-        }
-        return buf;
-    } else {
+    /* return robj */
+    if (robjFlag) {
         robj *o = tryCreateStringObject(SDS_NOINIT,len);
         if (!o) {
             serverLog(isRestoreContext()? LL_VERBOSE: LL_WARNING, "rdbGenericLoadStringObject failed allocating %llu bytes", len);
@@ -565,6 +549,32 @@ void *rdbGenericLoadStringObject(rio *rdb, int flags, size_t *lenptr) {
         }
         return o;
     }
+
+    /* plain/sds/hfld */
+    if (plainFlag) {
+        buf = ztrymalloc(len);
+    } else if (sdsFlag) {
+        buf = sdstrynewlen(SDS_NOINIT,len);
+    }  else { /* hfldFlag */
+        buf = hfieldTryNew(NULL, len, (flags&RDB_LOAD_HFLD) ? 0 : 1);
+    }
+    if (!buf) {
+        serverLog(isRestoreContext()? LL_VERBOSE: LL_WARNING, "rdbGenericLoadStringObject failed allocating %llu bytes", len);
+        return NULL;
+    }
+
+    if (lenptr) *lenptr = len;
+    if (len && rioRead(rdb,buf,len) == 0) {
+        if (plainFlag)
+            zfree(buf);
+        else if (sdsFlag) {
+            sdsfree(buf);
+        } else { /* hfldFlag */
+            hfieldFree(buf);
+        }
+        return NULL;
+    }
+    return buf;
 }
 
 robj *rdbLoadStringObject(rio *rdb) {
@@ -583,7 +593,7 @@ robj *rdbLoadEncodedStringObject(rio *rdb) {
  * 254: + inf
  * 255: - inf
  */
-int rdbSaveDoubleValue(rio *rdb, double val) {
+ssize_t rdbSaveDoubleValue(rio *rdb, double val) {
     unsigned char buf[128];
     int len;
 
@@ -686,9 +696,14 @@ int rdbSaveObjectType(rio *rdb, robj *o) {
     case OBJ_HASH:
         if (o->encoding == OBJ_ENCODING_LISTPACK)
             return rdbSaveType(rdb,RDB_TYPE_HASH_LISTPACK);
-        else if (o->encoding == OBJ_ENCODING_HT)
-            return rdbSaveType(rdb,RDB_TYPE_HASH);
-        else
+        else if (o->encoding == OBJ_ENCODING_LISTPACK_EX)
+            return rdbSaveType(rdb,RDB_TYPE_HASH_LISTPACK_EX);
+        else if (o->encoding == OBJ_ENCODING_HT) {
+            if (hashTypeGetMinExpire(o, 0) == EB_EXPIRE_TIME_INVALID)
+                return rdbSaveType(rdb,RDB_TYPE_HASH);
+            else
+                return rdbSaveType(rdb,RDB_TYPE_HASH_METADATA);
+        } else
             serverPanic("Unknown hash encoding");
     case OBJ_STREAM:
         return rdbSaveType(rdb,RDB_TYPE_STREAM_LISTPACKS_3);
@@ -929,32 +944,58 @@ ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) {
         }
     } else if (o->type == OBJ_HASH) {
         /* Save a hash value */
-        if (o->encoding == OBJ_ENCODING_LISTPACK) {
-            size_t l = lpBytes((unsigned char*)o->ptr);
+        if ((o->encoding == OBJ_ENCODING_LISTPACK) ||
+            (o->encoding == OBJ_ENCODING_LISTPACK_EX))
+        {
+            unsigned char *lp_ptr = hashTypeListpackGetLp(o);
+            size_t l = lpBytes(lp_ptr);
 
-            if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
+            if ((n = rdbSaveRawString(rdb,lp_ptr,l)) == -1) return -1;
             nwritten += n;
         } else if (o->encoding == OBJ_ENCODING_HT) {
             dictIterator *di = dictGetIterator(o->ptr);
             dictEntry *de;
-
+            /* Determine the hash layout to use based on the presence of at least
+             * one field with a valid TTL. If such a field exists, employ the
+             * RDB_TYPE_HASH_METADATA layout, including tuples of [ttl][field][value].
+             * Otherwise, use the standard RDB_TYPE_HASH layout containing only
+             * the tuples [field][value]. */
+            int with_ttl = (hashTypeGetMinExpire(o, 0) != EB_EXPIRE_TIME_INVALID);
+
+            /* save number of fields in hash */
             if ((n = rdbSaveLen(rdb,dictSize((dict*)o->ptr))) == -1) {
                 dictReleaseIterator(di);
                 return -1;
             }
             nwritten += n;
 
+            /* save all hash fields */
             while((de = dictNext(di)) != NULL) {
-                sds field = dictGetKey(de);
+                hfield field = dictGetKey(de);
                 sds value = dictGetVal(de);
 
+                /* save the TTL */
+                if (with_ttl) {
+                    uint64_t ttl = hfieldGetExpireTime(field);
+                    /* 0 is used to indicate no TTL is set for this field */
+                    if (ttl == EB_EXPIRE_TIME_INVALID) ttl = 0;
+                    if ((n = rdbSaveLen(rdb, ttl)) == -1) {
+                        dictReleaseIterator(di);
+                        return -1;
+                    }
+                    nwritten += n;
+                }
+
+                /* save the key */
                 if ((n = rdbSaveRawString(rdb,(unsigned char*)field,
-                        sdslen(field))) == -1)
+                        hfieldlen(field))) == -1)
                 {
                     dictReleaseIterator(di);
                     return -1;
                 }
                 nwritten += n;
+
+                /* save the value */
                 if ((n = rdbSaveRawString(rdb,(unsigned char*)value,
                         sdslen(value))) == -1)
                 {
@@ -1298,17 +1339,16 @@ ssize_t rdbSaveFunctions(rio *rdb) {
 }
 
 ssize_t rdbSaveDb(rio *rdb, int dbid, int rdbflags, long *key_counter) {
-    dictIterator *di;
     dictEntry *de;
     ssize_t written = 0;
     ssize_t res;
+    kvstoreIterator *kvs_it = NULL;
     static long long info_updated_time = 0;
     char *pname = (rdbflags & RDBFLAGS_AOF_PREAMBLE) ? "AOF rewrite" :  "RDB";
 
     redisDb *db = server.db + dbid;
-    dict *d = db->dict;
-    if (dictSize(d) == 0) return 0;
-    di = dictGetSafeIterator(d);
+    unsigned long long int db_size = kvstoreSize(db->keys);
+    if (db_size == 0) return 0;
 
     /* Write the SELECT DB opcode */
     if ((res = rdbSaveType(rdb,RDB_OPCODE_SELECTDB)) < 0) goto werr;
@@ -1317,9 +1357,7 @@ ssize_t rdbSaveDb(rio *rdb, int dbid, int rdbflags, long *key_counter) {
     written += res;
 
     /* Write the RESIZE DB opcode. */
-    uint64_t db_size, expires_size;
-    db_size = dictSize(db->dict);
-    expires_size = dictSize(db->expires);
+    unsigned long long expires_size = kvstoreSize(db->expires);
     if ((res = rdbSaveType(rdb,RDB_OPCODE_RESIZEDB)) < 0) goto werr;
     written += res;
     if ((res = rdbSaveLen(rdb,db_size)) < 0) goto werr;
@@ -1327,8 +1365,23 @@ ssize_t rdbSaveDb(rio *rdb, int dbid, int rdbflags, long *key_counter) {
     if ((res = rdbSaveLen(rdb,expires_size)) < 0) goto werr;
     written += res;
 
+    kvs_it = kvstoreIteratorInit(db->keys);
+    int last_slot = -1;
     /* Iterate this DB writing every entry */
-    while((de = dictNext(di)) != NULL) {
+    while ((de = kvstoreIteratorNext(kvs_it)) != NULL) {
+        int curr_slot = kvstoreIteratorGetCurrentDictIndex(kvs_it);
+        /* Save slot info. */
+        if (server.cluster_enabled && curr_slot != last_slot) {
+            if ((res = rdbSaveType(rdb, RDB_OPCODE_SLOT_INFO)) < 0) goto werr;
+            written += res;
+            if ((res = rdbSaveLen(rdb, curr_slot)) < 0) goto werr;
+            written += res;
+            if ((res = rdbSaveLen(rdb, kvstoreDictSize(db->keys, curr_slot))) < 0) goto werr;
+            written += res;
+            if ((res = rdbSaveLen(rdb, kvstoreDictSize(db->expires, curr_slot))) < 0) goto werr;
+            written += res;
+            last_slot = curr_slot;
+        }
         sds keystr = dictGetKey(de);
         robj key, *o = dictGetVal(de);
         long long expire;
@@ -1356,12 +1409,11 @@ ssize_t rdbSaveDb(rio *rdb, int dbid, int rdbflags, long *key_counter) {
             }
         }
     }
-
-    dictReleaseIterator(di);
+    kvstoreIteratorRelease(kvs_it);
     return written;
 
 werr:
-    dictReleaseIterator(di);
+    if (kvs_it) kvstoreIteratorRelease(kvs_it);
     return -1;
 }
 
@@ -1413,7 +1465,8 @@ int rdbSaveRio(int req, rio *rdb, int *error, int rdbflags, rdbSaveInfo *rsi) {
     return C_ERR;
 }
 
-/* This is just a wrapper to rdbSaveRio() that additionally adds a prefix
+/* This helper function is only used for diskless replication.
+ * This is just a wrapper to rdbSaveRio() that additionally adds a prefix
  * and a suffix to the generated RDB dump. The prefix is:
  *
  * $EOF:<40 bytes unguessable hex string>\r\n
@@ -1430,7 +1483,7 @@ int rdbSaveRioWithEOFMark(int req, rio *rdb, int *error, rdbSaveInfo *rsi) {
     if (rioWrite(rdb,"$EOF:",5) == 0) goto werr;
     if (rioWrite(rdb,eofmark,RDB_EOF_MARK_SIZE) == 0) goto werr;
     if (rioWrite(rdb,"\r\n",2) == 0) goto werr;
-    if (rdbSaveRio(req,rdb,error,RDBFLAGS_NONE,rsi) == C_ERR) goto werr;
+    if (rdbSaveRio(req,rdb,error,RDBFLAGS_REPLICATION,rsi) == C_ERR) goto werr;
     if (rioWrite(rdb,eofmark,RDB_EOF_MARK_SIZE) == 0) goto werr;
     stopSaving(1);
     return C_OK;
@@ -1517,7 +1570,7 @@ int rdbSave(int req, char *filename, rdbSaveInfo *rsi, int rdbflags) {
     char tmpfile[256];
     char cwd[MAXPATHLEN]; /* Current working dir path for error messages. */
 
-    startSaving(RDBFLAGS_NONE);
+    startSaving(rdbflags);
     snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
 
     if (rdbSaveInternal(req,tmpfile,rsi,rdbflags) != C_OK) {
@@ -1762,19 +1815,20 @@ static int _listZiplistEntryConvertAndValidate(unsigned char *p, unsigned int he
 /* callback for to check the listpack doesn't have duplicate records */
 static int _lpEntryValidation(unsigned char *p, unsigned int head_count, void *userdata) {
     struct {
-        int pairs;
+        int tuple_len;
         long count;
         dict *fields;
+        long long last_expireat;
     } *data = userdata;
 
     if (data->fields == NULL) {
         data->fields = dictCreate(&hashDictType);
-        dictExpand(data->fields, data->pairs ? head_count/2 : head_count);
+        dictExpand(data->fields, head_count/data->tuple_len);
     }
 
     /* If we're checking pairs, then even records are field names. Otherwise
      * we're checking all elements. Add to dict and check that's not a dup */
-    if (!data->pairs || ((data->count) & 1) == 0) {
+    if (data->count % data->tuple_len == 0) {
         unsigned char *str;
         int64_t slen;
         unsigned char buf[LP_INTBUF_SIZE];
@@ -1788,6 +1842,19 @@ static int _lpEntryValidation(unsigned char *p, unsigned int head_count, void *u
         }
     }
 
+    /* Validate TTL field, only for listpackex. */
+    if (data->count % data->tuple_len == 2) {
+        long long expire_at;
+        /* Must be an integer. */
+        if (!lpGetIntegerValue(p, &expire_at)) return 0;
+        /* Must be less than EB_EXPIRE_TIME_MAX. */
+        if (expire_at < 0 || (unsigned long long)expire_at > EB_EXPIRE_TIME_MAX) return 0;
+        /* TTL fields are ordered. If the current field has TTL, the previous field must
+         * also have one, and the current TTL must be greater than the previous one. */
+        if (expire_at != 0 && (data->last_expireat == 0 || expire_at < data->last_expireat)) return 0;
+        data->last_expireat = expire_at;
+    }
+
     (data->count)++;
     return 1;
 }
@@ -1795,23 +1862,25 @@ static int _lpEntryValidation(unsigned char *p, unsigned int head_count, void *u
 /* Validate the integrity of the listpack structure.
  * when `deep` is 0, only the integrity of the header is validated.
  * when `deep` is 1, we scan all the entries one by one.
- * when `pairs` is 0, all elements need to be unique (it's a set)
- * when `pairs` is 1, odd elements need to be unique (it's a key-value map) */
-int lpValidateIntegrityAndDups(unsigned char *lp, size_t size, int deep, int pairs) {
+ * tuple_len indicates what is a logical entry tuple size.
+ * Whether tuple is of size 1 (set), 2 (feild-value) or 3 (field-value[-ttl]),
+ * first element in the tuple must be unique */
+int lpValidateIntegrityAndDups(unsigned char *lp, size_t size, int deep, int tuple_len) {
     if (!deep)
         return lpValidateIntegrity(lp, size, 0, NULL, NULL);
 
     /* Keep track of the field names to locate duplicate ones */
     struct {
-        int pairs;
+        int tuple_len;
         long count;
         dict *fields; /* Initialisation at the first callback. */
-    } data = {pairs, 0, NULL};
+        long long last_expireat; /* Last field's expiry time to ensure order in TTL fields. */
+    } data = {tuple_len, 0, NULL, -1};
 
     int ret = lpValidateIntegrity(lp, size, 1, _lpEntryValidation, &data);
 
-    /* make sure we have an even number of records. */
-    if (pairs && data.count & 1)
+    /* the number of records should be a multiple of the tuple length */
+    if (data.count % tuple_len != 0)
         ret = 0;
 
     if (data.fields) dictRelease(data.fields);
@@ -1820,9 +1889,16 @@ int lpValidateIntegrityAndDups(unsigned char *lp, size_t size, int deep, int pai
 
 /* Load a Redis object of the specified type from the specified file.
  * On success a newly allocated object is returned, otherwise NULL.
- * When the function returns NULL and if 'error' is not NULL, the
- * integer pointed by 'error' is set to the type of error that occurred */
-robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
+ *
+ * error - When the function returns NULL and if 'error' is not NULL, the
+ *   integer pointed by 'error' is set to the type of error that occurred
+ * minExpiredField - If loading a hash with expiration on fields, then this value
+ *   will be set to the minimum expire time found in the hash fields. If there are
+ *   no fields with expiration or it is not a hash, then it will set be to
+ *   EB_EXPIRE_TIME_INVALID.
+ */
+robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error)
+{
     robj *o = NULL, *ele, *dec;
     uint64_t len;
     unsigned int i;
@@ -1850,9 +1926,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
         if ((len = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
         if (len == 0) goto emptykey;
 
-        o = createQuicklistObject();
-        quicklistSetOptions(o->ptr, server.list_max_listpack_size,
-                            server.list_compress_depth);
+        o = createQuicklistObject(server.list_max_listpack_size, server.list_compress_depth);
 
         /* Load every single element of the list */
         while(len--) {
@@ -1867,7 +1941,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
             decrRefCount(ele);
         }
 
-        listTypeTryConversion(o,LIST_CONV_AUTO,NULL,NULL);
+        listTypeTryConversion(o, LIST_CONV_AUTO, NULL, NULL);
     } else if (rdbtype == RDB_TYPE_SET) {
         /* Read Set value */
         if ((len = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
@@ -1880,7 +1954,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
             o = createSetObject();
             /* It's faster to expand the dict to the right size asap in order
              * to avoid rehashing */
-            if (len > DICT_HT_INITIAL_SIZE && dictTryExpand(o->ptr,len) != DICT_OK) {
+            if (len > DICT_HT_INITIAL_SIZE && dictTryExpand(o->ptr, len) != DICT_OK) {
                 rdbReportCorruptRDB("OOM in dictTryExpand %llu", (unsigned long long)len);
                 decrRefCount(o);
                 return NULL;
@@ -1907,7 +1981,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
                 /* Fetch integer value from element. */
                 if (isSdsRepresentableAsLongLong(sdsele,&llval) == C_OK) {
                     uint8_t success;
-                    o->ptr = intsetAdd(o->ptr,llval,&success);
+                    o->ptr = intsetAdd(o->ptr, llval, &success);
                     if (!success) {
                         rdbReportCorruptRDB("Duplicate set members detected");
                         decrRefCount(o);
@@ -1957,7 +2031,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
             /* This will also be called when the set was just converted
              * to a regular hash table encoded set. */
             if (o->encoding == OBJ_ENCODING_HT) {
-                if (dictAdd((dict*)o->ptr,sdsele,NULL) != DICT_OK) {
+                if (dictAdd((dict*)o->ptr, sdsele, NULL) != DICT_OK) {
                     rdbReportCorruptRDB("Duplicate set members detected");
                     decrRefCount(o);
                     sdsfree(sdsele);
@@ -2035,12 +2109,13 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
             maxelelen <= server.zset_max_listpack_value &&
             lpSafeToAdd(NULL, totelelen))
         {
-            zsetConvert(o,OBJ_ENCODING_LISTPACK);
+            zsetConvert(o, OBJ_ENCODING_LISTPACK);
         }
     } else if (rdbtype == RDB_TYPE_HASH) {
         uint64_t len;
         int ret;
-        sds field, value;
+        sds value;
+        hfield field;
         dict *dupSearchDict = NULL;
 
         len = rdbLoadLen(rdb, NULL);
@@ -2051,7 +2126,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
 
         /* Too many entries? Use a hash table right from the start. */
         if (len > server.hash_max_listpack_entries)
-            hashTypeConvert(o, OBJ_ENCODING_HT);
+            hashTypeConvert(o, OBJ_ENCODING_HT, NULL);
         else if (deep_integrity_validation) {
             /* In this mode, we need to guarantee that the server won't crash
              * later when the ziplist is converted to a dict.
@@ -2060,48 +2135,50 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
             dupSearchDict = dictCreate(&hashDictType);
         }
 
-
-        /* Load every field and value into the ziplist */
+        /* Load every field and value into the listpack */
         while (o->encoding == OBJ_ENCODING_LISTPACK && len > 0) {
             len--;
             /* Load raw strings */
-            if ((field = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL) {
+            if ((field = rdbGenericLoadStringObject(rdb,RDB_LOAD_HFLD,NULL)) == NULL) {
                 decrRefCount(o);
                 if (dupSearchDict) dictRelease(dupSearchDict);
                 return NULL;
             }
             if ((value = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL) {
-                sdsfree(field);
+                hfieldFree(field);
                 decrRefCount(o);
                 if (dupSearchDict) dictRelease(dupSearchDict);
                 return NULL;
             }
 
             if (dupSearchDict) {
-                sds field_dup = sdsdup(field);
+                sds field_dup = sdsnewlen(field, hfieldlen(field));
+
                 if (dictAdd(dupSearchDict, field_dup, NULL) != DICT_OK) {
                     rdbReportCorruptRDB("Hash with dup elements");
                     dictRelease(dupSearchDict);
                     decrRefCount(o);
                     sdsfree(field_dup);
-                    sdsfree(field);
+                    hfieldFree(field);
                     sdsfree(value);
                     return NULL;
                 }
             }
 
             /* Convert to hash table if size threshold is exceeded */
-            if (sdslen(field) > server.hash_max_listpack_value ||
+            if (hfieldlen(field) > server.hash_max_listpack_value ||
                 sdslen(value) > server.hash_max_listpack_value ||
-                !lpSafeToAdd(o->ptr, sdslen(field)+sdslen(value)))
+                !lpSafeToAdd(o->ptr, hfieldlen(field) + sdslen(value)))
             {
-                hashTypeConvert(o, OBJ_ENCODING_HT);
+                hashTypeConvert(o, OBJ_ENCODING_HT, NULL);
+                dictUseStoredKeyApi((dict *)o->ptr, 1);
                 ret = dictAdd((dict*)o->ptr, field, value);
+                dictUseStoredKeyApi((dict *)o->ptr, 0);
                 if (ret == DICT_ERR) {
                     rdbReportCorruptRDB("Duplicate hash fields detected");
                     if (dupSearchDict) dictRelease(dupSearchDict);
                     sdsfree(value);
-                    sdsfree(field);
+                    hfieldFree(field);
                     decrRefCount(o);
                     return NULL;
                 }
@@ -2109,10 +2186,10 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
             }
 
             /* Add pair to listpack */
-            o->ptr = lpAppend(o->ptr, (unsigned char*)field, sdslen(field));
+            o->ptr = lpAppend(o->ptr, (unsigned char*)field, hfieldlen(field));
             o->ptr = lpAppend(o->ptr, (unsigned char*)value, sdslen(value));
 
-            sdsfree(field);
+            hfieldFree(field);
             sdsfree(value);
         }
 
@@ -2124,7 +2201,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
         }
 
         if (o->encoding == OBJ_ENCODING_HT && len > DICT_HT_INITIAL_SIZE) {
-            if (dictTryExpand(o->ptr,len) != DICT_OK) {
+            if (dictTryExpand(o->ptr, len) != DICT_OK) {
                 rdbReportCorruptRDB("OOM in dictTryExpand %llu", (unsigned long long)len);
                 decrRefCount(o);
                 return NULL;
@@ -2135,22 +2212,25 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
         while (o->encoding == OBJ_ENCODING_HT && len > 0) {
             len--;
             /* Load encoded strings */
-            if ((field = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL) {
+            if ((field = rdbGenericLoadStringObject(rdb,RDB_LOAD_HFLD,NULL)) == NULL) {
                 decrRefCount(o);
                 return NULL;
             }
             if ((value = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL) {
-                sdsfree(field);
+                hfieldFree(field);
                 decrRefCount(o);
                 return NULL;
             }
 
             /* Add pair to hash table */
-            ret = dictAdd((dict*)o->ptr, field, value);
+            dict *d = o->ptr;
+            dictUseStoredKeyApi(d, 1);
+            ret = dictAdd(d, field, value);
+            dictUseStoredKeyApi(d, 0);
             if (ret == DICT_ERR) {
                 rdbReportCorruptRDB("Duplicate hash fields detected");
                 sdsfree(value);
-                sdsfree(field);
+                hfieldFree(field);
                 decrRefCount(o);
                 return NULL;
             }
@@ -2158,13 +2238,146 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
 
         /* All pairs should be read by now */
         serverAssert(len == 0);
+    } else if (rdbtype == RDB_TYPE_HASH_METADATA) {
+        sds value;
+        hfield field;
+        uint64_t expireAt;
+        dict *dupSearchDict = NULL;
+
+        len = rdbLoadLen(rdb, NULL);
+        if (len == RDB_LENERR) return NULL;
+        if (len == 0) goto emptykey;
+        /* TODO: create listpackEx or HT directly*/
+        o = createHashObject();
+        /* Too many entries? Use a hash table right from the start. */
+        if (len > server.hash_max_listpack_entries) {
+            hashTypeConvert(o, OBJ_ENCODING_HT, NULL);
+            dictTypeAddMeta((dict**)&o->ptr, &mstrHashDictTypeWithHFE);
+            initDictExpireMetadata(key, o);
+        } else {
+            hashTypeConvert(o, OBJ_ENCODING_LISTPACK_EX, NULL);
+            if (deep_integrity_validation) {
+                /* In this mode, we need to guarantee that the server won't crash
+                * later when the listpack is converted to a dict.
+                * Create a set (dict with no values) for dup search.
+                * We can dismiss it as soon as we convert the listpack to a hash. */
+                dupSearchDict = dictCreate(&hashDictType);
+            }
+        }
+
+        while (len > 0) {
+            len--;
+
+            /* read the TTL */
+            if (rdbLoadLenByRef(rdb, NULL, &expireAt) == -1) {
+                serverLog(LL_WARNING, "failed reading hash TTL");
+                decrRefCount(o);
+                if (dupSearchDict != NULL) dictRelease(dupSearchDict);
+                return NULL;
+            }
+            if (expireAt > EB_EXPIRE_TIME_MAX) {
+                rdbReportCorruptRDB("invalid expireAt time: %llu", (unsigned long long)expireAt);
+                decrRefCount(o);
+                return NULL;
+            }
+
+            /* if needed create field with TTL metadata  */
+            if (expireAt !=0)
+                field = rdbGenericLoadStringObject(rdb, RDB_LOAD_HFLD_TTL, NULL);
+            else
+                field = rdbGenericLoadStringObject(rdb, RDB_LOAD_HFLD, NULL);
+
+            if (field == NULL) {
+                serverLog(LL_WARNING, "failed reading hash field");
+                decrRefCount(o);
+                if (dupSearchDict != NULL) dictRelease(dupSearchDict);
+                return NULL;
+            }
+
+            /* read the value */
+            if ((value = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL) {
+                serverLog(LL_WARNING, "failed reading hash value");
+                decrRefCount(o);
+                if (dupSearchDict != NULL) dictRelease(dupSearchDict);
+                hfieldFree(field);
+                return NULL;
+            }
+
+            /* store the values read - either to listpack or dict */
+            if (o->encoding == OBJ_ENCODING_LISTPACK_EX) {
+                /* integrity - check for key duplication (if required) */
+                if (dupSearchDict) {
+                    sds field_dup = sdsnewlen(field, hfieldlen(field));
+
+                    if (dictAdd(dupSearchDict, field_dup, NULL) != DICT_OK) {
+                        rdbReportCorruptRDB("Hash with dup elements");
+                        dictRelease(dupSearchDict);
+                        decrRefCount(o);
+                        sdsfree(field_dup);
+                        sdsfree(value);
+                        hfieldFree(field);
+                        return NULL;
+                    }
+                }
+
+                /* check if the values can be saved to listpack (or should convert to dict encoding) */
+                if (hfieldlen(field) > server.hash_max_listpack_value ||
+                    sdslen(value) > server.hash_max_listpack_value ||
+                    !lpSafeToAdd(((listpackEx*)o->ptr)->lp, hfieldlen(field) + sdslen(value) + lpEntrySizeInteger(expireAt)))
+                {
+                    /* convert to hash */
+                    hashTypeConvert(o, OBJ_ENCODING_HT, NULL);
+
+                    if (len > DICT_HT_INITIAL_SIZE) { /* TODO: this is NOT the original len, but this is also the case for simple hash, is this a bug? */
+                        if (dictTryExpand(o->ptr, len) != DICT_OK) {
+                            rdbReportCorruptRDB("OOM in dictTryExpand %llu", (unsigned long long)len);
+                            decrRefCount(o);
+                            if (dupSearchDict != NULL) dictRelease(dupSearchDict);
+                            sdsfree(value);
+                            hfieldFree(field);
+                            return NULL;
+                        }
+                    }
+
+                    /* don't add the values to the new hash: the next if will catch and the values will be added there */
+                } else {
+                    listpackExAddNew(o, field, hfieldlen(field),
+                                     value, sdslen(value), expireAt);
+                    hfieldFree(field);
+                    sdsfree(value);
+                }
+            }
+
+            if (o->encoding == OBJ_ENCODING_HT) {
+                /* Add pair to hash table */
+                dict *d = o->ptr;
+                dictUseStoredKeyApi(d, 1);
+                int ret = dictAdd(d, field, value);
+                dictUseStoredKeyApi(d, 0);
+
+                /* Attach expiry to the hash field and register in hash private HFE DS */
+                if ((ret != DICT_ERR) && expireAt) {
+                    dictExpireMetadata *m = (dictExpireMetadata *) dictMetadata(d);
+                    ret = ebAdd(&m->hfe, &hashFieldExpireBucketsType, field, expireAt);
+                }
+
+                if (ret == DICT_ERR) {
+                    rdbReportCorruptRDB("Duplicate hash fields detected");
+                    sdsfree(value);
+                    hfieldFree(field);
+                    decrRefCount(o);
+                    return NULL;
+                }
+            }
+        }
+
+        if (dupSearchDict != NULL) dictRelease(dupSearchDict);
+
     } else if (rdbtype == RDB_TYPE_LIST_QUICKLIST || rdbtype == RDB_TYPE_LIST_QUICKLIST_2) {
         if ((len = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
         if (len == 0) goto emptykey;
 
-        o = createQuicklistObject();
-        quicklistSetOptions(o->ptr, server.list_max_listpack_size,
-                            server.list_compress_depth);
+        o = createQuicklistObject(server.list_max_listpack_size, server.list_compress_depth);
         uint64_t container = QUICKLIST_NODE_CONTAINER_PACKED;
         while (len--) {
             unsigned char *lp;
@@ -2234,7 +2447,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
             goto emptykey;
         }
 
-        listTypeTryConversion(o,LIST_CONV_AUTO,NULL,NULL);
+        listTypeTryConversion(o, LIST_CONV_AUTO, NULL, NULL);
     } else if (rdbtype == RDB_TYPE_HASH_ZIPMAP  ||
                rdbtype == RDB_TYPE_LIST_ZIPLIST ||
                rdbtype == RDB_TYPE_SET_INTSET   ||
@@ -2242,14 +2455,15 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
                rdbtype == RDB_TYPE_ZSET_ZIPLIST ||
                rdbtype == RDB_TYPE_ZSET_LISTPACK ||
                rdbtype == RDB_TYPE_HASH_ZIPLIST ||
-               rdbtype == RDB_TYPE_HASH_LISTPACK)
+               rdbtype == RDB_TYPE_HASH_LISTPACK ||
+               rdbtype == RDB_TYPE_HASH_LISTPACK_EX)
     {
         size_t encoded_len;
         unsigned char *encoded =
             rdbGenericLoadStringObject(rdb,RDB_LOAD_PLAIN,&encoded_len);
         if (encoded == NULL) return NULL;
 
-        o = createObject(OBJ_STRING,encoded); /* Obj type fixed below. */
+        o = createObject(OBJ_STRING, encoded); /* Obj type fixed below. */
 
         /* Fix the object encoding, and make sure to convert the encoded
          * data type into the base type if accordingly to the current
@@ -2305,14 +2519,14 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
                     o->type = OBJ_HASH;
                     o->encoding = OBJ_ENCODING_LISTPACK;
 
-                    if (hashTypeLength(o) > server.hash_max_listpack_entries ||
+                    if (hashTypeLength(o, 0) > server.hash_max_listpack_entries ||
                         maxlen > server.hash_max_listpack_value)
                     {
-                        hashTypeConvert(o, OBJ_ENCODING_HT);
+                        hashTypeConvert(o, OBJ_ENCODING_HT, NULL);
                     }
                 }
                 break;
-            case RDB_TYPE_LIST_ZIPLIST: 
+            case RDB_TYPE_LIST_ZIPLIST:
                 {
                     quicklist *ql = quicklistNew(server.list_max_listpack_size,
                                                  server.list_compress_depth);
@@ -2354,11 +2568,11 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
                 o->type = OBJ_SET;
                 o->encoding = OBJ_ENCODING_INTSET;
                 if (intsetLen(o->ptr) > server.set_max_intset_entries)
-                    setTypeConvert(o,OBJ_ENCODING_HT);
+                    setTypeConvert(o, OBJ_ENCODING_HT);
                 break;
             case RDB_TYPE_SET_LISTPACK:
                 if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
-                if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 0)) {
+                if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 1)) {
                     rdbReportCorruptRDB("Set listpack integrity check failed.");
                     zfree(encoded);
                     o->ptr = NULL;
@@ -2399,14 +2613,14 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
                     }
 
                     if (zsetLength(o) > server.zset_max_listpack_entries)
-                        zsetConvert(o,OBJ_ENCODING_SKIPLIST);
+                        zsetConvert(o, OBJ_ENCODING_SKIPLIST);
                     else
                         o->ptr = lpShrinkToFit(o->ptr);
                     break;
                 }
             case RDB_TYPE_ZSET_LISTPACK:
                 if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
-                if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 1)) {
+                if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 2)) {
                     rdbReportCorruptRDB("Zset listpack integrity check failed.");
                     zfree(encoded);
                     o->ptr = NULL;
@@ -2421,7 +2635,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
                 }
 
                 if (zsetLength(o) > server.zset_max_listpack_entries)
-                    zsetConvert(o,OBJ_ENCODING_SKIPLIST);
+                    zsetConvert(o, OBJ_ENCODING_SKIPLIST);
                 break;
             case RDB_TYPE_HASH_ZIPLIST:
                 {
@@ -2439,35 +2653,54 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
                     o->ptr = lp;
                     o->type = OBJ_HASH;
                     o->encoding = OBJ_ENCODING_LISTPACK;
-                    if (hashTypeLength(o) == 0) {
+                    if (hashTypeLength(o, 0) == 0) {
                         decrRefCount(o);
                         goto emptykey;
                     }
 
-                    if (hashTypeLength(o) > server.hash_max_listpack_entries)
-                        hashTypeConvert(o, OBJ_ENCODING_HT);
+                    if (hashTypeLength(o, 0) > server.hash_max_listpack_entries)
+                        hashTypeConvert(o, OBJ_ENCODING_HT, NULL);
                     else
                         o->ptr = lpShrinkToFit(o->ptr);
                     break;
                 }
             case RDB_TYPE_HASH_LISTPACK:
+            case RDB_TYPE_HASH_LISTPACK_EX:
+                /* listpack-encoded hash with TTL requires its own struct
+                 * pointed to by o->ptr */
+                o->type = OBJ_HASH;
+                if (rdbtype == RDB_TYPE_HASH_LISTPACK_EX) {
+                    listpackEx *lpt = listpackExCreate();
+                    lpt->lp = encoded;
+                    lpt->key = key;
+                    o->ptr = lpt;
+                    o->encoding = OBJ_ENCODING_LISTPACK_EX;
+                } else
+                    o->encoding = OBJ_ENCODING_LISTPACK;
+
+                /* tuple_len is the number of elements for each key:
+                 * key + value for simple hash, key + value + tll for hash with TTL*/
+                int tuple_len = (rdbtype == RDB_TYPE_HASH_LISTPACK ? 2 : 3);
+                /* validate read data */
                 if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
-                if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 1)) {
+                if (!lpValidateIntegrityAndDups(encoded, encoded_len,
+                                                deep_integrity_validation, tuple_len)) {
                     rdbReportCorruptRDB("Hash listpack integrity check failed.");
-                    zfree(encoded);
-                    o->ptr = NULL;
                     decrRefCount(o);
                     return NULL;
                 }
-                o->type = OBJ_HASH;
-                o->encoding = OBJ_ENCODING_LISTPACK;
-                if (hashTypeLength(o) == 0) {
+
+                /* if listpack is empty, delete it */
+                if (hashTypeLength(o, 0) == 0) {
                     decrRefCount(o);
                     goto emptykey;
                 }
 
-                if (hashTypeLength(o) > server.hash_max_listpack_entries)
-                    hashTypeConvert(o, OBJ_ENCODING_HT);
+                /* Convert listpack to hash table without registering in global HFE DS,
+                 * if has HFEs, since the listpack is not connected yet to the DB */
+                if (hashTypeLength(o, 0) > server.hash_max_listpack_entries)
+                    hashTypeConvert(o, OBJ_ENCODING_HT, NULL /*db->hexpires*/);
+
                 break;
             default:
                 /* totally unreachable */
@@ -2553,7 +2786,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
         /* Load the last entry ID. */
         s->last_id.ms = rdbLoadLen(rdb,NULL);
         s->last_id.seq = rdbLoadLen(rdb,NULL);
-        
+
         if (rdbtype >= RDB_TYPE_STREAM_LISTPACKS_2) {
             /* Load the first entry ID. */
             s->first_id.ms = rdbLoadLen(rdb,NULL);
@@ -2572,9 +2805,9 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
             s->max_deleted_entry_id.ms = 0;
             s->max_deleted_entry_id.seq = 0;
             s->entries_added = s->length;
-            
+
             /* Since the rax is already loaded, we can find the first entry's
-             * ID. */ 
+             * ID. */
             streamGetEdgeID(s,1,1,&s->first_id);
         }
 
@@ -2740,13 +2973,14 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
                         decrRefCount(o);
                         return NULL;
                     }
-                    streamNACK *nack = raxFind(cgroup->pel,rawid,sizeof(rawid));
-                    if (nack == raxNotFound) {
+                    void *result;
+                    if (!raxFind(cgroup->pel,rawid,sizeof(rawid),&result)) {
                         rdbReportCorruptRDB("Consumer entry not found in "
                                                 "group global PEL");
                         decrRefCount(o);
                         return NULL;
                     }
+                    streamNACK *nack = result;
 
                     /* Set the NACK consumer, that was left to NULL when
                      * loading the global PEL. Then set the same shared
@@ -2819,7 +3053,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
         uint64_t eof = rdbLoadLen(rdb,NULL);
         if (eof == RDB_LENERR) {
             if (ptr) {
-                o = createModuleObject(mt,ptr); /* creating just in order to easily destroy */
+                o = createModuleObject(mt, ptr); /* creating just in order to easily destroy */
                 decrRefCount(o);
             }
             return NULL;
@@ -2828,7 +3062,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
             rdbReportCorruptRDB("The RDB file contains module data for the module '%s' that is not terminated by "
                                 "the proper module value EOF marker", moduleTypeModuleName(mt));
             if (ptr) {
-                o = createModuleObject(mt,ptr); /* creating just in order to easily destroy */
+                o = createModuleObject(mt, ptr); /* creating just in order to easily destroy */
                 decrRefCount(o);
             }
             return NULL;
@@ -2840,11 +3074,12 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
                                 moduleTypeModuleName(mt));
             return NULL;
         }
-        o = createModuleObject(mt,ptr);
+        o = createModuleObject(mt, ptr);
     } else {
         rdbReportReadError("Unknown RDB encoding type %d",rdbtype);
         return NULL;
     }
+
     if (error) *error = 0;
     return o;
 
@@ -2981,7 +3216,7 @@ int rdbFunctionLoad(rio *rdb, int ver, functionsLibCtx* lib_ctx, int rdbflags, s
 
     if (lib_ctx) {
         sds library_name = NULL;
-        if (!(library_name = functionsCreateWithLibraryCtx(final_payload, rdbflags & RDBFLAGS_ALLOW_DUP, &error, lib_ctx))) {
+        if (!(library_name = functionsCreateWithLibraryCtx(final_payload, rdbflags & RDBFLAGS_ALLOW_DUP, &error, lib_ctx, 0))) {
             if (!error) {
                 error = sdsnew("Failed creating the library");
             }
@@ -3014,7 +3249,6 @@ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) {
     return retval;
 }
 
-
 /* Load an RDB file from the rio stream 'rdb'. On success C_OK is returned,
  * otherwise C_ERR is returned.
  * The rdb_loading_ctx argument holds objects to which the rdb will be loaded to,
@@ -3023,6 +3257,8 @@ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) {
 int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadingCtx *rdb_loading_ctx) {
     uint64_t dbid = 0;
     int type, rdbver;
+    uint64_t db_size = 0, expires_size = 0;
+    int should_expand_db = 0;
     redisDb *db = rdb_loading_ctx->dbarray+0;
     char buf[1024];
     int error;
@@ -3098,13 +3334,27 @@ int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadin
         } else if (type == RDB_OPCODE_RESIZEDB) {
             /* RESIZEDB: Hint about the size of the keys in the currently
              * selected data base, in order to avoid useless rehashing. */
-            uint64_t db_size, expires_size;
             if ((db_size = rdbLoadLen(rdb,NULL)) == RDB_LENERR)
                 goto eoferr;
             if ((expires_size = rdbLoadLen(rdb,NULL)) == RDB_LENERR)
                 goto eoferr;
-            dictExpand(db->dict,db_size);
-            dictExpand(db->expires,expires_size);
+            should_expand_db = 1;
+            continue; /* Read next opcode. */
+        } else if (type == RDB_OPCODE_SLOT_INFO) {
+            uint64_t slot_id, slot_size, expires_slot_size;
+            if ((slot_id = rdbLoadLen(rdb,NULL)) == RDB_LENERR)
+                goto eoferr;
+            if ((slot_size = rdbLoadLen(rdb,NULL)) == RDB_LENERR)
+                goto eoferr;
+            if ((expires_slot_size = rdbLoadLen(rdb,NULL)) == RDB_LENERR)
+                goto eoferr;
+            if (!server.cluster_enabled) {
+                continue; /* Ignore gracefully. */
+            }
+            /* In cluster mode we resize individual slot specific dictionaries based on the number of keys that slot holds. */
+            kvstoreDictExpand(db->keys, slot_id, slot_size);
+            kvstoreDictExpand(db->expires, slot_id, expires_slot_size);
+            should_expand_db = 0;
             continue; /* Read next opcode. */
         } else if (type == RDB_OPCODE_AUX) {
             /* AUX: generic string-string fields. Use to add state to RDB
@@ -3234,6 +3484,14 @@ int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadin
             continue;
         }
 
+        /* If there is no slot info, it means that it's either not cluster mode or we are trying to load legacy RDB file.
+         * In this case we want to estimate number of keys per slot and resize accordingly. */
+        if (should_expand_db) {
+            dbExpand(db, db_size, 0);
+            dbExpandExpires(db, expires_size, 0);
+            should_expand_db = 0;
+        }
+
         /* Read key */
         if ((key = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL)) == NULL)
             goto eoferr;
@@ -3245,8 +3503,8 @@ int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadin
          * received from the master. In the latter case, the master is
          * responsible for key expiry. If we would expire keys here, the
          * snapshot taken by the master may not be reflected on the slave.
-         * Similarly, if the base AOF is RDB format, we want to load all 
-         * the keys they are, since the log of operations in the incr AOF 
+         * Similarly, if the base AOF is RDB format, we want to load all
+         * the keys they are, since the log of operations in the incr AOF
          * is assumed to work in the exact keyspace state. */
         if (val == NULL) {
             /* Since we used to have bug that could lead to empty keys
@@ -3301,6 +3559,14 @@ int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadin
                 }
             }
 
+            /* If minExpiredField was set, then the object is hash with expiration
+             * on fields and need to register it in global HFE DS */
+            if (val->type == OBJ_HASH) {
+                uint64_t minExpiredField = hashTypeGetMinExpire(val, 1);
+                if (minExpiredField != EB_EXPIRE_TIME_INVALID)
+                    hashTypeAddToExpires(db, key, val, minExpiredField);
+            }
+
             /* Set the expire time if needed */
             if (expiretime != -1) {
                 setExpire(NULL,db,&keyobj,expiretime);
@@ -3402,19 +3668,19 @@ int rdbLoad(char *filename, rdbSaveInfo *rsi, int rdbflags) {
     if (retval == C_OK && !(rdbflags & RDBFLAGS_KEEP_CACHE)) {
         /* TODO: maybe we could combine the fopen and open into one in the future */
         rdb_fd = open(filename, O_RDONLY);
-        if (rdb_fd > 0) bioCreateCloseJob(rdb_fd, 0, 1);
+        if (rdb_fd >= 0) bioCreateCloseJob(rdb_fd, 0, 1);
     }
     return (retval==C_OK) ? RDB_OK : RDB_FAILED;
 }
 
 /* A background saving child (BGSAVE) terminated its work. Handle this.
  * This function covers the case of actual BGSAVEs. */
-static void backgroundSaveDoneHandlerDisk(int exitcode, int bysignal) {
+static void backgroundSaveDoneHandlerDisk(int exitcode, int bysignal, time_t save_end) {
     if (!bysignal && exitcode == 0) {
         serverLog(LL_NOTICE,
             "Background saving terminated with success");
         server.dirty = server.dirty - server.dirty_before_bgsave;
-        server.lastsave = time(NULL);
+        server.lastsave = save_end;
         server.lastbgsave_status = C_OK;
     } else if (!bysignal && exitcode != 0) {
         serverLog(LL_WARNING, "Background saving error");
@@ -3466,9 +3732,11 @@ static void backgroundSaveDoneHandlerSocket(int exitcode, int bysignal) {
 /* When a background RDB saving/transfer terminates, call the right handler. */
 void backgroundSaveDoneHandler(int exitcode, int bysignal) {
     int type = server.rdb_child_type;
+    time_t save_end = time(NULL);
+
     switch(server.rdb_child_type) {
     case RDB_CHILD_TYPE_DISK:
-        backgroundSaveDoneHandlerDisk(exitcode,bysignal);
+        backgroundSaveDoneHandlerDisk(exitcode,bysignal,save_end);
         break;
     case RDB_CHILD_TYPE_SOCKET:
         backgroundSaveDoneHandlerSocket(exitcode,bysignal);
@@ -3479,7 +3747,7 @@ void backgroundSaveDoneHandler(int exitcode, int bysignal) {
     }
 
     server.rdb_child_type = RDB_CHILD_TYPE_NONE;
-    server.rdb_save_time_last = time(NULL)-server.rdb_save_time_start;
+    server.rdb_save_time_last = save_end-server.rdb_save_time_start;
     server.rdb_save_time_start = -1;
     /* Possibly there are slaves waiting for a BGSAVE in order to be served
      * (the first stage of SYNC is a bulk transfer of dump.rdb) */
@@ -3598,6 +3866,7 @@ int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
             }
             close(rdb_pipe_write);
             close(server.rdb_pipe_read);
+            close(server.rdb_child_exit_pipe);
             zfree(server.rdb_pipe_conns);
             server.rdb_pipe_conns = NULL;
             server.rdb_pipe_numconns = 0;
diff --git a/src/rdb.h b/src/rdb.h
index 234bde221be..65da1932239 100644
--- a/src/rdb.h
+++ b/src/rdb.h
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __RDB_H
@@ -38,7 +17,7 @@
 
 /* The current RDB version. When the format changes in a way that is no longer
  * backward compatible this number gets incremented. */
-#define RDB_VERSION 11
+#define RDB_VERSION 12
 
 /* Defines related to the dump file format. To store 32 bits lengths for short
  * keys requires a lot of space, so we check the most significant 2 bits of
@@ -81,9 +60,6 @@
 #define RDB_TYPE_MODULE_PRE_GA 6 /* Used in 4.0 release candidates */
 #define RDB_TYPE_MODULE_2 7 /* Module value with annotations for parsing without
                                the generating module being loaded. */
-/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */
-
-/* Object types for encoded objects. */
 #define RDB_TYPE_HASH_ZIPMAP    9
 #define RDB_TYPE_LIST_ZIPLIST  10
 #define RDB_TYPE_SET_INTSET    11
@@ -97,12 +73,15 @@
 #define RDB_TYPE_STREAM_LISTPACKS_2 19
 #define RDB_TYPE_SET_LISTPACK  20
 #define RDB_TYPE_STREAM_LISTPACKS_3 21
-/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */
+#define RDB_TYPE_HASH_METADATA 22
+#define RDB_TYPE_HASH_LISTPACK_EX 23
+/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType(), and rdb_type_string[] */
 
 /* Test if a type is an object type. */
-#define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 21))
+#define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 23))
 
 /* Special RDB opcodes (saved/loaded with rdbSaveType/rdbLoadType). */
+#define RDB_OPCODE_SLOT_INFO  244   /* Individual slot info, such as slot id and size (cluster mode only). */
 #define RDB_OPCODE_FUNCTION2  245   /* function library data */
 #define RDB_OPCODE_FUNCTION_PRE_GA   246   /* old function library data for 7.0 rc1 and rc2 */
 #define RDB_OPCODE_MODULE_AUX 247   /* Module auxiliary data. */
@@ -124,13 +103,15 @@
 #define RDB_MODULE_OPCODE_STRING 5  /* String. */
 
 /* rdbLoad...() functions flags. */
-#define RDB_LOAD_NONE   0
-#define RDB_LOAD_ENC    (1<<0)
-#define RDB_LOAD_PLAIN  (1<<1)
-#define RDB_LOAD_SDS    (1<<2)
+#define RDB_LOAD_NONE     0
+#define RDB_LOAD_ENC      (1<<0)
+#define RDB_LOAD_PLAIN    (1<<1)
+#define RDB_LOAD_SDS      (1<<2)
+#define RDB_LOAD_HFLD     (1<<3)
+#define RDB_LOAD_HFLD_TTL (1<<4)
 
 /* flags on the purpose of rdb save or load */
-#define RDBFLAGS_NONE 0                 /* No special RDB loading. */
+#define RDBFLAGS_NONE 0                 /* No special RDB loading or saving. */
 #define RDBFLAGS_AOF_PREAMBLE (1<<0)    /* Load/save the RDB as AOF preamble. */
 #define RDBFLAGS_REPLICATION (1<<1)     /* Load/save for SYNC. */
 #define RDBFLAGS_ALLOW_DUP (1<<2)       /* Allow duplicated keys when loading.*/
@@ -139,15 +120,15 @@
 
 /* When rdbLoadObject() returns NULL, the err flag is
  * set to hold the type of error that occurred */
-#define RDB_LOAD_ERR_EMPTY_KEY  1   /* Error of empty key */
-#define RDB_LOAD_ERR_OTHER      2   /* Any other errors */
+#define RDB_LOAD_ERR_EMPTY_KEY       1   /* Error of empty key */
+#define RDB_LOAD_ERR_OTHER           2   /* Any other errors */
 
 ssize_t rdbWriteRaw(rio *rdb, void *p, size_t len);
 int rdbSaveType(rio *rdb, unsigned char type);
 int rdbLoadType(rio *rdb);
 time_t rdbLoadTime(rio *rdb);
 int rdbSaveLen(rio *rdb, uint64_t len);
-int rdbSaveMillisecondTime(rio *rdb, long long t);
+ssize_t rdbSaveMillisecondTime(rio *rdb, long long t);
 long long rdbLoadMillisecondTime(rio *rdb, int rdbver);
 uint64_t rdbLoadLen(rio *rdb, int *isencoded);
 int rdbLoadLenByRef(rio *rdb, int *isencoded, uint64_t *lenptr);
diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c
index 1ee562f20ca..94151ba8af9 100644
--- a/src/redis-benchmark.c
+++ b/src/redis-benchmark.c
@@ -1,35 +1,13 @@
 /* Redis benchmark utility.
  *
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "fmacros.h"
-#include "version.h"
 
 #include <stdio.h>
 #include <string.h>
@@ -167,7 +145,6 @@ typedef struct clusterNode {
     sds replicate;  /* Master ID if node is a slave */
     int *slots;
     int slots_count;
-    int current_slot_index;
     int *updated_slots;         /* Used by updateClusterSlotsConfiguration */
     int updated_slots_count;    /* Used by updateClusterSlotsConfiguration */
     int replicas_count;
@@ -186,8 +163,6 @@ typedef struct redisConfig {
 } redisConfig;
 
 /* Prototypes */
-char *redisGitSHA1(void);
-char *redisGitDirty(void);
 static void writeHandler(aeEventLoop *el, int fd, void *privdata, int mask);
 static void createMissingClients(client c);
 static benchmarkThread *createBenchmarkThread(int index);
@@ -205,20 +180,6 @@ static void updateClusterSlotsConfiguration(void);
 int showThroughput(struct aeEventLoop *eventLoop, long long id,
                    void *clientData);
 
-static sds benchmarkVersion(void) {
-    sds version;
-    version = sdscatprintf(sdsempty(), "%s", REDIS_VERSION);
-
-    /* Add git commit and working tree status when available */
-    if (strtoll(redisGitSHA1(),NULL,16)) {
-        version = sdscatprintf(version, " (git:%s", redisGitSHA1());
-        if (strtoll(redisGitDirty(),NULL,10))
-            version = sdscatprintf(version, "-dirty");
-        version = sdscat(version, ")");
-    }
-    return version;
-}
-
 /* Dict callbacks */
 static uint64_t dictSdsHash(const void *key);
 static int dictSdsKeyCompare(dict *d, const void *key1, const void *key2);
@@ -434,7 +395,6 @@ static void setClusterKeyHashTag(client c) {
     assert(c->thread_id >= 0);
     clusterNode *node = c->cluster_node;
     assert(node);
-    assert(node->current_slot_index < node->slots_count);
     int is_updating_slots = 0;
     atomicGet(config.is_updating_slots, is_updating_slots);
     /* If updateClusterSlotsConfiguration is updating the slots array,
@@ -444,7 +404,7 @@ static void setClusterKeyHashTag(client c) {
      * updateClusterSlotsConfiguration won't actually do anything, since
      * the updated_slots_count array will be already NULL. */
     if (is_updating_slots) updateClusterSlotsConfiguration();
-    int slot = node->slots[node->current_slot_index];
+    int slot = node->slots[rand() % node->slots_count];
     const char *tag = crc16_slot_table[slot];
     int taglen = strlen(tag);
     size_t i;
@@ -1064,7 +1024,6 @@ static clusterNode *createClusterNode(char *ip, int port) {
     node->replicas_count = 0;
     node->slots = zmalloc(CLUSTER_SLOTS * sizeof(int));
     node->slots_count = 0;
-    node->current_slot_index = 0;
     node->updated_slots = NULL;
     node->updated_slots_count = 0;
     node->migrating = NULL;
@@ -1387,7 +1346,6 @@ static void updateClusterSlotsConfiguration(void) {
             int *oldslots = node->slots;
             node->slots = node->updated_slots;
             node->slots_count = node->updated_slots_count;
-            node->current_slot_index = 0;
             node->updated_slots = NULL;
             node->updated_slots_count = 0;
             zfree(oldslots);
@@ -1423,7 +1381,7 @@ int parseOptions(int argc, char **argv) {
             if (lastarg) goto invalid;
             config.numclients = atoi(argv[++i]);
         } else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) {
-            sds version = benchmarkVersion();
+            sds version = cliVersion();
             printf("redis-benchmark %s\n", version);
             sdsfree(version);
             exit(0);
@@ -1613,7 +1571,10 @@ int parseOptions(int argc, char **argv) {
 " -s <socket>        Server socket (overrides host and port)\n"
 " -a <password>      Password for Redis Auth\n"
 " --user <username>  Used to send ACL style 'AUTH username pass'. Needs -a.\n"
-" -u <uri>           Server URI.\n"
+" -u <uri>           Server URI on format redis://user:password@host:port/dbnum\n"
+"                    User, password and dbnum are optional. For authentication\n"
+"                    without a username, use username 'default'. For TLS, use\n"
+"                    the scheme 'rediss'.\n"
 " -c <clients>       Number of parallel connections (default 50).\n"
 "                    Note: If --cluster is used then number of clients has to be\n"
 "                    the same or higher than the number of nodes.\n"
@@ -1888,8 +1849,12 @@ int main(int argc, char **argv) {
             sds_args[argc] = readArgFromStdin();
             argc++;
         }
+        /* Setup argument length */
+        size_t *argvlen = zmalloc(argc*sizeof(size_t));
+        for (i = 0; i < argc; i++)
+            argvlen[i] = sdslen(sds_args[i]);
         do {
-            len = redisFormatCommandArgv(&cmd,argc,(const char**)sds_args,NULL);
+            len = redisFormatCommandArgv(&cmd,argc,(const char**)sds_args,argvlen);
             // adjust the datasize to the parsed command
             config.datasize = len;
             benchmark(title,cmd,len);
@@ -1899,6 +1864,7 @@ int main(int argc, char **argv) {
 
         sdsfree(title);
         if (config.redis_config != NULL) freeRedisConfig(config.redis_config);
+        zfree(argvlen);
         return 0;
     }
 
diff --git a/src/redis-check-aof.c b/src/redis-check-aof.c
index 616177a8b7a..56298387e26 100644
--- a/src/redis-check-aof.c
+++ b/src/redis-check-aof.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-current, Redis Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -29,6 +29,7 @@
  */
 
 #include "server.h"
+
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <regex.h>
@@ -233,11 +234,13 @@ int checkSingleAof(char *aof_filename, char *aof_filepath, int last_file, int fi
     struct redis_stat sb;
     if (redis_fstat(fileno(fp),&sb) == -1) {
         printf("Cannot stat file: %s, aborting...\n", aof_filename);
+        fclose(fp);
         exit(1);
     }
 
     off_t size = sb.st_size;
     if (size == 0) {
+        fclose(fp);
         return AOF_CHECK_EMPTY;
     }
 
@@ -343,6 +346,7 @@ int fileIsRDB(char *filepath) {
     struct redis_stat sb;
     if (redis_fstat(fileno(fp), &sb) == -1) {
         printf("Cannot stat file: %s\n", filepath);
+        fclose(fp);
         exit(1);
     }
 
@@ -379,6 +383,7 @@ int fileIsManifest(char *filepath) {
     struct redis_stat sb;
     if (redis_fstat(fileno(fp), &sb) == -1) {
         printf("Cannot stat file: %s\n", filepath);
+        fclose(fp);
         exit(1);
     }
 
@@ -395,15 +400,20 @@ int fileIsManifest(char *filepath) {
                 break;
             } else {
                 printf("Cannot read file: %s\n", filepath);
+                fclose(fp);
                 exit(1);
             }
         }
 
-        /* Skip comments lines */
+        /* We will skip comments lines.
+         * At present, the manifest format is fixed, see aofInfoFormat.
+         * We will break directly as long as it encounters other items. */
         if (buf[0] == '#') {
             continue;
         } else if (!memcmp(buf, "file", strlen("file"))) {
             is_manifest = 1;
+        } else {
+            break;
         }
     }
 
@@ -514,6 +524,13 @@ int redis_check_aof_main(int argc, char **argv) {
     if (argc < 2) {
         goto invalid_args;
     } else if (argc == 2) {
+        if (!strcmp(argv[1], "-v") || !strcmp(argv[1], "--version")) {
+            sds version = getVersion();
+            printf("redis-check-aof %s\n", version);
+            sdsfree(version);
+            exit(0);
+        }
+
         filepath = argv[1];
     } else if (argc == 3) {
         if (!strcmp(argv[1], "--fix")) {
diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c
index 682135e55f2..090c1bd4433 100644
--- a/src/redis-check-rdb.c
+++ b/src/redis-check-rdb.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2016-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "mt19937-64.h"
@@ -98,7 +77,11 @@ char *rdb_type_string[] = {
     "hash-listpack",
     "zset-listpack",
     "quicklist-v2",
+    "stream-v2",
     "set-listpack",
+    "stream-v3",
+    "hash-hashtable-md",
+    "hash-listpack-md",
 };
 
 /* Show a few stats collected into 'rdbstate' */
@@ -276,6 +259,15 @@ int redis_check_rdb(char *rdbfilename, FILE *fp) {
             if ((expires_size = rdbLoadLen(&rdb,NULL)) == RDB_LENERR)
                 goto eoferr;
             continue; /* Read type again. */
+        } else if (type == RDB_OPCODE_SLOT_INFO) {
+            uint64_t slot_id, slot_size, expires_slot_size;
+            if ((slot_id = rdbLoadLen(&rdb,NULL)) == RDB_LENERR)
+                goto eoferr;
+            if ((slot_size = rdbLoadLen(&rdb,NULL)) == RDB_LENERR)
+                goto eoferr;
+            if ((expires_slot_size = rdbLoadLen(&rdb,NULL)) == RDB_LENERR)
+                goto eoferr;
+            continue; /* Read type again. */
         } else if (type == RDB_OPCODE_AUX) {
             /* AUX: generic string-string fields. Use to add state to RDB
              * which is backward compatible. Implementations of RDB loading
@@ -341,7 +333,8 @@ int redis_check_rdb(char *rdbfilename, FILE *fp) {
         rdbstate.keys++;
         /* Read value */
         rdbstate.doing = RDB_CHECK_DOING_READ_OBJECT_VALUE;
-        if ((val = rdbLoadObject(type,&rdb,key->ptr,selected_dbid,NULL)) == NULL) goto eoferr;
+        if ((val = rdbLoadObject(type,&rdb,key->ptr,selected_dbid,NULL)) == NULL)
+            goto eoferr;
         /* Check if the key already expired. */
         if (expiretime != -1 && expiretime < now)
             rdbstate.already_expired++;
@@ -385,20 +378,6 @@ int redis_check_rdb(char *rdbfilename, FILE *fp) {
     return 1;
 }
 
-static sds checkRdbVersion(void) {
-    sds version;
-    version = sdscatprintf(sdsempty(), "%s", REDIS_VERSION);
-
-    /* Add git commit and working tree status when available */
-    if (strtoll(redisGitSHA1(),NULL,16)) {
-        version = sdscatprintf(version, " (git:%s", redisGitSHA1());
-        if (strtoll(redisGitDirty(),NULL,10))
-            version = sdscatprintf(version, "-dirty");
-        version = sdscat(version, ")");
-    }
-    return version;
-}
-
 /* RDB check main: called form server.c when Redis is executed with the
  * redis-check-rdb alias, on during RDB loading errors.
  *
@@ -418,7 +397,7 @@ int redis_check_rdb_main(int argc, char **argv, FILE *fp) {
         fprintf(stderr, "Usage: %s <rdb-file-name>\n", argv[0]);
         exit(1);
     } else if (!strcmp(argv[1],"-v") || !strcmp(argv[1], "--version")) {
-        sds version = checkRdbVersion();
+        sds version = getVersion();
         printf("redis-check-rdb %s\n", version);
         sdsfree(version);
         exit(0);
diff --git a/src/redis-cli.c b/src/redis-cli.c
index de34965b48e..2590fb182ba 100644
--- a/src/redis-cli.c
+++ b/src/redis-cli.c
@@ -1,36 +1,15 @@
 /* Redis CLI (command line interface)
  *
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "fmacros.h"
-#include "version.h"
 
+#include <stdarg.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
@@ -64,8 +43,8 @@
 #include "connection.h"
 #include "cli_common.h"
 #include "mt19937-64.h"
-
 #include "cli_commands.h"
+#include "hdr_histogram.h"
 
 #define UNUSED(V) ((void) V)
 
@@ -163,6 +142,10 @@
 /* DNS lookup */
 #define NET_IP_STR_LEN 46       /* INET6_ADDRSTRLEN is 46 */
 
+#define REFRESH_INTERVAL 300 /* milliseconds */
+
+#define IS_TTY_OR_FAKETTY() (isatty(STDOUT_FILENO) || getenv("FAKETTY"))
+
 /* --latency-dist palettes. */
 int spectrum_palette_color_size = 19;
 int spectrum_palette_color[] = {0,233,234,235,237,239,241,243,245,247,144,143,142,184,226,214,208,202,196};
@@ -213,6 +196,7 @@ static int createClusterManagerCommand(char *cmdname, int argc, char **argv);
 static redisContext *context;
 static struct config {
     cliConnInfo conn_info;
+    struct timeval connect_timeout;
     char *hostsocket;
     int tls;
     cliSSLconfig sslconfig;
@@ -246,8 +230,11 @@ static struct config {
     char *rdb_filename;
     int bigkeys;
     int memkeys;
-    unsigned memkeys_samples;
+    long long memkeys_samples;
     int hotkeys;
+    int keystats;
+    unsigned long long cursor;
+    unsigned long top_sizes_limit;
     int stdin_lastarg; /* get last arg from stdin. (-x option) */
     int stdin_tag_arg; /* get <tag> arg from stdin. (-X option) */
     char *stdin_tag_name; /* Placeholder(tag name) for user input. */
@@ -277,6 +264,8 @@ static struct config {
     char *server_version;
     char *test_hint;
     char *test_hint_file;
+    int prefer_ipv4; /* Prefer IPv4 over IPv6 on DNS lookup. */
+    int prefer_ipv6; /* Prefer IPv6 over IPv4 on DNS lookup. */
 } config;
 
 /* User preferences. */
@@ -287,8 +276,6 @@ static struct pref {
 static volatile sig_atomic_t force_cancel_loop = 0;
 static void usage(int err);
 static void slaveMode(int send_sync);
-char *redisGitSHA1(void);
-char *redisGitDirty(void);
 static int cliConnect(int flags);
 
 static char *getInfoField(char *info, char *field);
@@ -404,6 +391,37 @@ void dictListDestructor(dict *d, void *val)
     listRelease((list*)val);
 }
 
+/* Erase the lines before printing, and returns the number of lines printed */
+int cleanPrintfln(char *fmt, ...) {
+    va_list args;
+    char buf[1024]; /* limitation */
+    int char_count, line_count = 0;
+
+    /* Clear the line if in TTY */
+    if (IS_TTY_OR_FAKETTY()) {
+        printf("\033[2K\r");
+    }
+
+    va_start(args, fmt);
+    char_count = vsnprintf(buf, sizeof(buf), fmt, args);
+    va_end(args);
+
+    if (char_count >= (int)sizeof(buf)) {
+        fprintf(stderr, "Warning: String was trimmed in cleanPrintln\n");
+    }
+
+    char *position, *string = buf;
+    while ((position = strchr(string, '\n')) != NULL) {
+        int line_length = (int)(position - string);
+        printf("%.*s\n", line_length, string);
+        string = position + 1;
+        line_count++;
+    }
+
+    printf("%s\n", string);
+    return line_count + 1;
+}
+
 /*------------------------------------------------------------------------------
  * Help functions
  *--------------------------------------------------------------------------- */
@@ -424,20 +442,6 @@ typedef struct {
 static helpEntry *helpEntries = NULL;
 static int helpEntriesLen = 0;
 
-static sds cliVersion(void) {
-    sds version;
-    version = sdscatprintf(sdsempty(), "%s", REDIS_VERSION);
-
-    /* Add git commit and working tree status when available */
-    if (strtoll(redisGitSHA1(),NULL,16)) {
-        version = sdscatprintf(version, " (git:%s", redisGitSHA1());
-        if (strtoll(redisGitDirty(),NULL,10))
-            version = sdscatprintf(version, "-dirty");
-        version = sdscat(version, ")");
-    }
-    return version;
-}
-
 /* For backwards compatibility with pre-7.0 servers.
  * cliLegacyInitHelp() sets up the helpEntries array with the command and group
  * names from the commands.c file. However the Redis instance we are connecting
@@ -760,8 +764,13 @@ static int versionIsSupported(sds version, sds since) {
         }
         versionPos = strchr(versionPos, '.');
         sincePos = strchr(sincePos, '.');
-        if (!versionPos || !sincePos)
-            return 0;
+
+        /* If we finished to parse both `version` and `since`, it means they are equal */
+        if (!versionPos && !sincePos) return 1;
+
+        /* Different number of digits considered as not supported */
+        if (!versionPos || !sincePos) return 0;
+
         versionPos++;
         sincePos++;
     }
@@ -778,7 +787,7 @@ static void removeUnsupportedArgs(struct cliCommandArg *args, int *numargs, sds
             i++;
             continue;
         }
-        for (j = i; j != *numargs; j++) {
+        for (j = i; j != *numargs - 1; j++) {
             args[j] = args[j + 1];
         }
         (*numargs)--;
@@ -1262,7 +1271,7 @@ static int matchNoTokenArg(char **nextword, int numwords, cliCommandArg *arg) {
     case ARG_TYPE_INTEGER:
     case ARG_TYPE_UNIX_TIME: {
         long long value;
-        if (sscanf(*nextword, "%lld", &value)) {
+        if (sscanf(*nextword, "%lld", &value) == 1) {
             arg->matched += 1;
             arg->matched_name = 1;
             arg->matched_all = 1;
@@ -1276,7 +1285,7 @@ static int matchNoTokenArg(char **nextword, int numwords, cliCommandArg *arg) {
 
     case ARG_TYPE_DOUBLE: {
         double value;
-        if (sscanf(*nextword, "%lf", &value)) {
+        if (sscanf(*nextword, "%lf", &value) == 1) {
             arg->matched += 1;
             arg->matched_name = 1;
             arg->matched_all = 1;
@@ -1657,15 +1666,17 @@ static int cliConnect(int flags) {
             redisFree(context);
             config.dbnum = 0;
             config.in_multi = 0;
+            config.pubsub_mode = 0;
             cliRefreshPrompt();
         }
 
         /* Do not use hostsocket when we got redirected in cluster mode */
         if (config.hostsocket == NULL ||
             (config.cluster_mode && config.cluster_reissue_command)) {
-            context = redisConnect(config.conn_info.hostip,config.conn_info.hostport);
+            context = redisConnectWrapper(config.conn_info.hostip, config.conn_info.hostport,
+                                          config.connect_timeout);
         } else {
-            context = redisConnectUnix(config.hostsocket);
+            context = redisConnectUnixWrapper(config.hostsocket, config.connect_timeout);
         }
 
         if (!context->err && config.tls) {
@@ -2291,8 +2302,12 @@ static int cliReadReply(int output_raw_strings) {
         slot = atoi(s+1);
         s = strrchr(p+1,':');    /* MOVED 3999[P]127.0.0.1[S]6381 */
         *s = '\0';
-        sdsfree(config.conn_info.hostip);
-        config.conn_info.hostip = sdsnew(p+1);
+        if (p+1 != s) {
+            /* Host might be empty, like 'MOVED 3999 :6381', if endpoint type is unknown. Only update the
+             * host if it's non-empty. */
+            sdsfree(config.conn_info.hostip);
+            config.conn_info.hostip = sdsnew(p+1);
+        }
         config.conn_info.hostport = atoi(s+1);
         if (config.interactive)
             printf("-> Redirected to slot [%d] located at %s:%d\n",
@@ -2604,7 +2619,8 @@ static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, ..
             fflush(stdout);
 
             redisFree(c);
-            c = redisConnect(config.conn_info.hostip,config.conn_info.hostport);
+            c = redisConnectWrapper(config.conn_info.hostip, config.conn_info.hostport,
+                                    config.connect_timeout);
             if (!c->err && config.tls) {
                 const char *err = NULL;
                 if (cliSecureConnection(c, config.sslconfig, &err) == REDIS_ERR && err) {
@@ -2659,6 +2675,15 @@ static int parseOptions(int argc, char **argv) {
                 fprintf(stderr, "Invalid server port.\n");
                 exit(1);
             }
+        } else if (!strcmp(argv[i],"-t") && !lastarg) {
+            char *eptr;
+            double seconds = strtod(argv[++i], &eptr);
+            if (eptr[0] != '\0' || isnan(seconds) || seconds < 0.0) {
+                fprintf(stderr, "Invalid connection timeout for -t.\n");
+                exit(1);
+            }
+            config.connect_timeout.tv_sec = (long long)seconds;
+            config.connect_timeout.tv_usec = ((long long)(seconds * 1000000)) % 1000000;
         } else if (!strcmp(argv[i],"-s") && !lastarg) {
             config.hostsocket = argv[++i];
         } else if (!strcmp(argv[i],"-r") && !lastarg) {
@@ -2754,12 +2779,63 @@ static int parseOptions(int argc, char **argv) {
             config.bigkeys = 1;
         } else if (!strcmp(argv[i],"--memkeys")) {
             config.memkeys = 1;
-            config.memkeys_samples = 0; /* use redis default */
+            config.memkeys_samples = -1; /* use redis default */
         } else if (!strcmp(argv[i],"--memkeys-samples") && !lastarg) {
+            char *endptr;
             config.memkeys = 1;
-            config.memkeys_samples = atoi(argv[++i]);
+            config.keystats = 1;
+            config.memkeys_samples = strtoll(argv[++i], &endptr, 10);
+            if (*endptr) {
+                fprintf(stderr, "--memkeys-samples conversion error.\n");
+                exit(1);
+            }
+            if (config.memkeys_samples < 0) {
+               fprintf(stderr, "--memkeys-samples value should be positive.\n");
+               exit(1);
+            }
         } else if (!strcmp(argv[i],"--hotkeys")) {
             config.hotkeys = 1;
+        } else if (!strcmp(argv[i], "--keystats")) {
+            config.keystats = 1;
+            config.memkeys_samples = -1; /* use redis default */
+        } else if (!strcmp(argv[i],"--keystats-samples") && !lastarg) {
+            char *endptr;
+            config.keystats = 1;
+            config.memkeys_samples = strtoll(argv[++i], &endptr, 10);
+            if (*endptr) {
+                fprintf(stderr, "--keystats-samples conversion error.\n");
+                exit(1);
+            }
+            if (config.memkeys_samples < 0) {
+               fprintf(stderr, "--keystats-samples value should be positive.\n");
+               exit(1);
+            }
+        } else if (!strcmp(argv[i],"--cursor") && !lastarg) {
+            i++;
+            char sign = *argv[i];
+            char *endptr;
+            config.cursor = strtoull(argv[i], &endptr, 10);
+            if (*endptr) {
+               fprintf(stderr, "--cursor conversion error.\n");
+               exit(1);
+            }
+            if (sign == '-' && config.cursor != 0) {
+                fprintf(stderr, "--cursor should be followed by a positive integer.\n");
+                exit(1);
+            }
+        } else if (!strcmp(argv[i],"--top") && !lastarg) {
+            i++;
+            char sign = *argv[i];
+            char *endptr;
+            config.top_sizes_limit = strtoull(argv[i], &endptr, 10);
+            if (*endptr) {
+               fprintf(stderr, "--top conversion error.\n");
+               exit(1);
+            }
+            if (sign == '-' && config.top_sizes_limit != 0) {
+                fprintf(stderr, "--top should be followed by a positive integer.\n");
+                exit(1);
+            }
         } else if (!strcmp(argv[i],"--eval") && !lastarg) {
             config.eval = argv[++i];
         } else if (!strcmp(argv[i],"--ldb")) {
@@ -2781,6 +2857,10 @@ static int parseOptions(int argc, char **argv) {
             config.set_errcode = 1;
         } else if (!strcmp(argv[i],"--verbose")) {
             config.verbose = 1;
+        } else if (!strcmp(argv[i],"-4")) {
+            config.prefer_ipv4 = 1;
+        } else if (!strcmp(argv[i],"-6")) {
+            config.prefer_ipv6 = 1;
         } else if (!strcmp(argv[i],"--cluster") && !lastarg) {
             if (CLUSTER_MANAGER_MODE()) usage(1);
             char *cmd = argv[++i];
@@ -2965,6 +3045,11 @@ static int parseOptions(int argc, char **argv) {
         exit(1);
     }
 
+    if (config.prefer_ipv4 && config.prefer_ipv6) {
+        fprintf(stderr, "Options -4 and -6 are mutually exclusive.\n");
+        exit(1);
+    }
+
     return i;
 }
 
@@ -3013,6 +3098,8 @@ static void usage(int err) {
 "Usage: redis-cli [OPTIONS] [cmd [arg [arg ...]]]\n"
 "  -h <hostname>      Server hostname (default: 127.0.0.1).\n"
 "  -p <port>          Server port (default: 6379).\n"
+"  -t <timeout>       Server connection timeout in seconds (decimals allowed).\n"
+"                     Default timeout is 0, meaning no limit, depending on the OS.\n"
 "  -s <socket>        Server socket (overrides hostname and port).\n"
 "  -a <password>      Password to use when connecting to the server.\n"
 "                     You can also use the " REDIS_CLI_AUTH_ENV " environment\n"
@@ -3023,7 +3110,10 @@ static void usage(int err) {
 "  --askpass          Force user to input password with mask from STDIN.\n"
 "                     If this argument is used, '-a' and " REDIS_CLI_AUTH_ENV "\n"
 "                     environment variable will be ignored.\n"
-"  -u <uri>           Server URI.\n"
+"  -u <uri>           Server URI on format redis://user:password@host:port/dbnum\n"
+"                     User, password and dbnum are optional. For authentication\n"
+"                     without a username, use username 'default'. For TLS, use\n"
+"                     the scheme 'rediss'.\n"
 "  -r <repeat>        Execute specified command N times.\n"
 "  -i <interval>      When -r is used, waits <interval> seconds per command.\n"
 "                     It is possible to specify sub-second times like -i 0.1.\n"
@@ -3038,6 +3128,8 @@ static void usage(int err) {
 "  -D <delimiter>     Delimiter between responses for raw formatting (default: \\n).\n"
 "  -c                 Enable cluster mode (follow -ASK and -MOVED redirections).\n"
 "  -e                 Return exit error code when command execution fails.\n"
+"  -4                 Prefer IPv4 over IPv6 on DNS lookup.\n"
+"  -6                 Prefer IPv6 over IPv4 on DNS lookup.\n"
 "%s"
 "  --raw              Use raw formatting for replies (default when STDOUT is\n"
 "                     not a tty).\n"
@@ -3079,6 +3171,13 @@ version,tls_usage);
 "  --memkeys          Sample Redis keys looking for keys consuming a lot of memory.\n"
 "  --memkeys-samples <n> Sample Redis keys looking for keys consuming a lot of memory.\n"
 "                     And define number of key elements to sample\n"
+"  --keystats         Sample Redis keys looking for keys memory size and length (combine bigkeys and memkeys).\n"
+"  --keystats-samples <n> Sample Redis keys looking for keys memory size and length.\n"
+"                     And define number of key elements to sample (only for memory usage).\n"
+"  --cursor <n>       Start the scan at the cursor <n> (usually after a Ctrl-C).\n"
+"                     Optionally used with --keystats and --keystats-samples.\n"
+"  --top <n>          To display <n> top key sizes (default: 10).\n"
+"                     Optionally used with --keystats and --keystats-samples.\n"
 "  --hotkeys          Sample Redis keys looking for hot keys.\n"
 "                     only works when maxmemory-policy is *lfu.\n"
 "  --scan             List all keys using the SCAN command.\n"
@@ -3108,6 +3207,7 @@ version,tls_usage);
 "  Use --cluster help to list all available cluster manager commands.\n"
 "\n"
 "Examples:\n"
+"  redis-cli -u redis://default:PASSWORD@localhost:6379/0\n"
 "  cat /etc/passwd | redis-cli -x set mypasswd\n"
 "  redis-cli -D \"\" --raw dump key > key.dump && redis-cli -X dump_tag restore key2 0 dump_tag replace < key.dump\n"
 "  redis-cli -r 100 lpush mylist x\n"
@@ -3257,16 +3357,20 @@ void cliLoadPreferences(void) {
 /* Some commands can include sensitive information and shouldn't be put in the
  * history file. Currently these commands are include:
  * - AUTH
- * - ACL SETUSER
- * - CONFIG SET masterauth/masteruser/requirepass
+ * - ACL DELUSER, ACL SETUSER, ACL GETUSER
+ * - CONFIG SET masterauth/masteruser/tls-key-file-pass/tls-client-key-file-pass/requirepass
  * - HELLO with [AUTH username password]
- * - MIGRATE with [AUTH password] or [AUTH2 username password] */
+ * - MIGRATE with [AUTH password] or [AUTH2 username password] 
+ * - SENTINEL CONFIG SET sentinel-pass password, SENTINEL CONFIG SET sentinel-user username 
+ * - SENTINEL SET <mastername> auth-pass password, SENTINEL SET <mastername> auth-user username */
 static int isSensitiveCommand(int argc, char **argv) {
     if (!strcasecmp(argv[0],"auth")) {
         return 1;
     } else if (argc > 1 &&
-        !strcasecmp(argv[0],"acl") &&
-        !strcasecmp(argv[1],"setuser"))
+        !strcasecmp(argv[0],"acl") && (
+            !strcasecmp(argv[1],"deluser") ||
+            !strcasecmp(argv[1],"setuser") ||
+            !strcasecmp(argv[1],"getuser")))
     {
         return 1;
     } else if (argc > 2 &&
@@ -3274,8 +3378,10 @@ static int isSensitiveCommand(int argc, char **argv) {
         !strcasecmp(argv[1],"set")) {
             for (int j = 2; j < argc; j = j+2) {
                 if (!strcasecmp(argv[j],"masterauth") ||
-		    !strcasecmp(argv[j],"masteruser") ||
-		    !strcasecmp(argv[j],"requirepass")) {
+                    !strcasecmp(argv[j],"masteruser") ||
+                    !strcasecmp(argv[j],"tls-key-file-pass") ||
+                    !strcasecmp(argv[j],"tls-client-key-file-pass") ||
+                    !strcasecmp(argv[j],"requirepass")) {
                     return 1;
                 }
             }
@@ -3305,6 +3411,24 @@ static int isSensitiveCommand(int argc, char **argv) {
                 return 0;
             }
         }
+    } else if (argc > 4 && !strcasecmp(argv[0], "sentinel")) {
+        /* SENTINEL CONFIG SET sentinel-pass password
+         * SENTINEL CONFIG SET sentinel-user username */
+        if (!strcasecmp(argv[1], "config") && 
+            !strcasecmp(argv[2], "set") &&
+            (!strcasecmp(argv[3], "sentinel-pass") ||
+             !strcasecmp(argv[3], "sentinel-user"))) 
+        {
+            return 1;
+        }
+        /* SENTINEL SET <mastername> auth-pass password 
+         * SENTINEL SET <mastername> auth-user username */
+        if (!strcasecmp(argv[1], "set") &&
+            (!strcasecmp(argv[3], "auth-pass") || 
+             !strcasecmp(argv[3], "auth-user"))) 
+        {
+            return 1;
+        }
     }
     return 0;
 }
@@ -3331,7 +3455,7 @@ static void repl(void) {
     linenoiseSetFreeHintsCallback(freeHintsCallback);
 
     /* Only use history and load the rc file when stdin is a tty. */
-    if (isatty(fileno(stdin))) {
+    if (getenv("FAKETTY_WITH_PROMPT") != NULL || isatty(fileno(stdin))) {
         historyfile = getDotfilePath(REDIS_CLI_HISTFILE_ENV,REDIS_CLI_HISTFILE_DEFAULT);
         //keep in-memory history always regardless if history file can be determined
         history = 1;
@@ -3361,7 +3485,7 @@ static void repl(void) {
             if (argv == NULL) {
                 printf("Invalid argument(s)\n");
                 fflush(stdout);
-                if (history) linenoiseHistoryAdd(line);
+                if (history) linenoiseHistoryAdd(line, 0);
                 if (historyfile) linenoiseHistorySave(historyfile);
                 linenoiseFree(line);
                 continue;
@@ -3387,10 +3511,11 @@ static void repl(void) {
                 repeat = 1;
             }
 
-            if (!isSensitiveCommand(argc - skipargs, argv + skipargs)) {
-                if (history) linenoiseHistoryAdd(line);
-                if (historyfile) linenoiseHistorySave(historyfile);
-            }
+            /* Always keep in-memory history. But for commands with sensitive information,
+             * avoid writing them to the history file. */
+            int is_sensitive = isSensitiveCommand(argc - skipargs, argv + skipargs);
+            if (history) linenoiseHistoryAdd(line, is_sensitive);
+            if (!is_sensitive && historyfile) linenoiseHistorySave(historyfile);
 
             if (strcasecmp(argv[0],"quit") == 0 ||
                 strcasecmp(argv[0],"exit") == 0)
@@ -3736,7 +3861,7 @@ typedef struct clusterManagerCommandDef {
 } clusterManagerCommandDef;
 
 clusterManagerCommandDef clusterManagerCommands[] = {
-    {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN",
+    {"create", clusterManagerCommandCreate, -1, "host1:port1 ... hostN:portN",
      "replicas <arg>"},
     {"check", clusterManagerCommandCheck, -1, "<host:port> or <host> <port> - separated by either colon or space",
      "search-multiple-owners"},
@@ -4043,7 +4168,7 @@ static int clusterManagerExecTransaction(clusterManagerNode *node,
 
 static int clusterManagerNodeConnect(clusterManagerNode *node) {
     if (node->context) redisFree(node->context);
-    node->context = redisConnect(node->ip, node->port);
+    node->context = redisConnectWrapper(node->ip, node->port, config.connect_timeout);
     if (!node->context->err && config.tls) {
         const char *err = NULL;
         if (cliSecureConnection(node->context, config.sslconfig, &err) == REDIS_ERR && err) {
@@ -4573,7 +4698,7 @@ static void clusterManagerShowNodes(void) {
 
 static void clusterManagerShowClusterInfo(void) {
     int masters = 0;
-    int keys = 0;
+    long long keys = 0;
     listIter li;
     listNode *ln;
     listRewind(cluster_manager.nodes, &li);
@@ -4582,7 +4707,7 @@ static void clusterManagerShowClusterInfo(void) {
         if (!(node->flags & CLUSTER_MANAGER_FLAG_SLAVE)) {
             if (!node->name) continue;
             int replicas = 0;
-            int dbsize = -1;
+            long long dbsize = -1;
             char name[9];
             memcpy(name, node->name, 8);
             name[8] = '\0';
@@ -4608,14 +4733,14 @@ static void clusterManagerShowClusterInfo(void) {
                 return;
             };
             if (reply != NULL) freeReplyObject(reply);
-            printf("%s:%d (%s...) -> %d keys | %d slots | %d slaves.\n",
+            printf("%s:%d (%s...) -> %lld keys | %d slots | %d slaves.\n",
                    node->ip, node->port, name, dbsize,
                    node->slots_count, replicas);
             masters++;
             keys += dbsize;
         }
     }
-    clusterManagerLogOk("[OK] %d keys in %d masters.\n", keys, masters);
+    clusterManagerLogOk("[OK] %lld keys in %d masters.\n", keys, masters);
     float keys_per_slot = keys / (float) CLUSTER_MANAGER_SLOTS;
     printf("%.2f keys per slot on average.\n", keys_per_slot);
 }
@@ -7055,7 +7180,10 @@ static int clusterManagerCommandCreate(int argc, char **argv) {
                 first = node;
                 /* Although hiredis supports connecting to a hostname, CLUSTER
                  * MEET requires an IP address, so we do a DNS lookup here. */
-                if (anetResolve(NULL, first->ip, first_ip, sizeof(first_ip), ANET_NONE)
+                int anet_flags = ANET_NONE;
+                if (config.prefer_ipv4) anet_flags |= ANET_PREFER_IPV4;
+                if (config.prefer_ipv6) anet_flags |= ANET_PREFER_IPV6;
+                if (anetResolve(NULL, first->ip, first_ip, sizeof(first_ip), anet_flags)
                     == ANET_ERR)
                 {
                     fprintf(stderr, "Invalid IP address or hostname specified: %s\n", first->ip);
@@ -7250,7 +7378,10 @@ static int clusterManagerCommandAddNode(int argc, char **argv) {
                           "join the cluster.\n", ip, port);
     /* CLUSTER MEET requires an IP address, so we do a DNS lookup here. */
     char first_ip[NET_IP_STR_LEN];
-    if (anetResolve(NULL, first->ip, first_ip, sizeof(first_ip), ANET_NONE) == ANET_ERR) {
+    int anet_flags = ANET_NONE;
+    if (config.prefer_ipv4) anet_flags |= ANET_PREFER_IPV4;
+    if (config.prefer_ipv6) anet_flags |= ANET_PREFER_IPV6;
+    if (anetResolve(NULL, first->ip, first_ip, sizeof(first_ip), anet_flags) == ANET_ERR) {
         fprintf(stderr, "Invalid IP address or hostname specified: %s\n", first->ip);
         success = 0;
         goto cleanup;
@@ -7862,7 +7993,7 @@ static int clusterManagerCommandImport(int argc, char **argv) {
     char *reply_err = NULL;
     redisReply *src_reply = NULL;
     // Connect to the source node.
-    redisContext *src_ctx = redisConnect(src_ip, src_port);
+    redisContext *src_ctx = redisConnectWrapper(src_ip, src_port, config.connect_timeout);
     if (src_ctx->err) {
         success = 0;
         fprintf(stderr,"Could not connect to Redis at %s:%d: %s.\n", src_ip,
@@ -8834,7 +8965,8 @@ static redisReply *sendScan(unsigned long long *it) {
         reply = redisCommand(context, "SCAN %llu MATCH %b COUNT %d",
             *it, config.pattern, sdslen(config.pattern), config.count);
     else
-        reply = redisCommand(context,"SCAN %llu",*it);
+        reply = redisCommand(context, "SCAN %llu COUNT %d",
+            *it, config.count);
 
     /* Handle any error conditions */
     if(reply == NULL) {
@@ -8885,6 +9017,28 @@ static int getDbSize(void) {
     return size;
 }
 
+static int getDatabases(void) {
+    redisReply *reply;
+    int dbnum;
+
+    reply = redisCommand(context, "CONFIG GET databases");
+
+    if (reply == NULL) {
+        fprintf(stderr, "\nI/O error\n");
+        exit(1);
+    } else if (reply->type == REDIS_REPLY_ERROR) {
+        dbnum = 16;
+        fprintf(stderr, "CONFIG GET databases fails: %s, use default value 16 instead\n", reply->str);
+    } else {
+        assert(reply->type == (config.current_resp3 ? REDIS_REPLY_MAP : REDIS_REPLY_ARRAY));
+        assert(reply->elements == 2);
+        dbnum = atoi(reply->element[1]->str);
+    }
+
+    freeReplyObject(reply);
+    return dbnum;
+}
+
 typedef struct {
     char *name;
     char *sizecmd;
@@ -8973,7 +9127,7 @@ static void getKeyTypes(dict *types_dict, redisReply *keys, typeinfo **types) {
 
 static void getKeySizes(redisReply *keys, typeinfo **types,
                         unsigned long long *sizes, int memkeys,
-                        unsigned memkeys_samples)
+                        long long memkeys_samples)
 {
     redisReply *reply;
     unsigned int i;
@@ -8988,7 +9142,7 @@ static void getKeySizes(redisReply *keys, typeinfo **types,
             const char* argv[] = {types[i]->sizecmd, keys->element[i]->str};
             size_t lens[] = {strlen(types[i]->sizecmd), keys->element[i]->len};
             redisAppendCommandArgv(context, 2, argv, lens);
-        } else if (memkeys_samples==0) {
+        } else if (memkeys_samples == -1) {
             const char* argv[] = {"MEMORY", "USAGE", keys->element[i]->str};
             size_t lens[] = {6, 5, keys->element[i]->len};
             redisAppendCommandArgv(context, 3, argv, lens);
@@ -9035,7 +9189,27 @@ static void longStatLoopModeStop(int s) {
     force_cancel_loop = 1;
 }
 
-static void findBigKeys(int memkeys, unsigned memkeys_samples) {
+/* In cluster mode we may need to send the READONLY command.
+   Ignore the error in case the server isn't using cluster mode. */
+static void sendReadOnly(void) {
+    redisReply *read_reply;
+    read_reply = redisCommand(context, "READONLY");
+    if (read_reply == NULL){
+        fprintf(stderr, "\nI/O error\n");
+        exit(1);
+    } else if (read_reply->type == REDIS_REPLY_ERROR && 
+               strcmp(read_reply->str, "ERR This instance has cluster support disabled") != 0 &&
+               strncmp(read_reply->str, "ERR unknown command", 19) != 0) {
+        fprintf(stderr, "Error: %s\n", read_reply->str);
+        exit(1);
+    }
+    freeReplyObject(read_reply);
+}
+
+static int displayKeyStatsProgressbar(unsigned long long sampled,
+                                      unsigned long long total_keys);
+
+static void findBigKeys(int memkeys, long long memkeys_samples) {
     unsigned long long sampled = 0, total_keys, totlen=0, *sizes=NULL, it=0, scan_loops = 0;
     redisReply *reply, *keys;
     unsigned int arrsize=0, i;
@@ -9043,6 +9217,7 @@ static void findBigKeys(int memkeys, unsigned memkeys_samples) {
     dictEntry *de;
     typeinfo **types = NULL;
     double pct;
+    long long refresh_time = mstime();
 
     dict *types_dict = dictCreate(&typeinfoDictType);
     typeinfo_add(types_dict, "string", &type_string);
@@ -9060,6 +9235,9 @@ static void findBigKeys(int memkeys, unsigned memkeys_samples) {
     printf("\n# Scanning the entire keyspace to find biggest keys as well as\n");
     printf("# average sizes per key type.  You can use -i 0.1 to sleep 0.1 sec\n");
     printf("# per 100 SCAN commands (not usually needed).\n\n");
+    
+    /* Use readonly in cluster */
+    sendReadOnly();
 
     /* SCAN loop */
     do {
@@ -9110,19 +9288,44 @@ static void findBigKeys(int memkeys, unsigned memkeys_samples) {
                     exit(1);
                 }
 
-                printf(
-                   "[%05.2f%%] Biggest %-6s found so far '%s' with %llu %s\n",
-                   pct, type->name, type->biggest_key, sizes[i],
-                   !memkeys? type->sizeunit: "bytes");
+                /* We only show the original progress output when writing to a file */
+                if (!IS_TTY_OR_FAKETTY()) {
+                    printf("[%05.2f%%] Biggest %-6s found so far %s with %llu %s\n",
+                        pct, type->name, type->biggest_key, sizes[i],
+                        !memkeys? type->sizeunit: "bytes");
+                }
 
                 /* Keep track of the biggest size for this type */
                 type->biggest = sizes[i];
             }
 
-            /* Update overall progress */
-            if(sampled % 1000000 == 0) {
+            /* Update overall progress
+             * We only show the original progress output when writing to a file */
+            if (sampled % 1000000 == 0 && !IS_TTY_OR_FAKETTY()) {
                 printf("[%05.2f%%] Sampled %llu keys so far\n", pct, sampled);
             }
+
+            /* Show the progress bar in TTY */
+            if (mstime() > refresh_time + REFRESH_INTERVAL && IS_TTY_OR_FAKETTY()) {
+                int line_count = 0;
+                refresh_time = mstime();
+
+                line_count = displayKeyStatsProgressbar(sampled, total_keys);
+                line_count += cleanPrintfln("");
+
+                di = dictGetIterator(types_dict);
+                while ((de = dictNext(di))) {
+                    typeinfo *current_type = dictGetVal(de);
+                    if (current_type->biggest > 0) {
+                        line_count += cleanPrintfln("Biggest %-9s found so far %s with %llu %s",
+                            current_type->name, current_type->biggest_key, current_type->biggest,
+                            !memkeys? current_type->sizeunit: "bytes");
+                    }
+                }
+                dictReleaseIterator(di);
+
+                printf("\033[%dA\r", line_count);
+            }
         }
 
         /* Sleep if we've been directed to do so */
@@ -9133,13 +9336,31 @@ static void findBigKeys(int memkeys, unsigned memkeys_samples) {
         freeReplyObject(reply);
     } while(force_cancel_loop == 0 && it != 0);
 
+    /* Final progress bar if TTY */
+    if (IS_TTY_OR_FAKETTY()) {
+        displayKeyStatsProgressbar(sampled, total_keys);
+
+        /* Clean the types info shown during the progress bar */
+        int line_count = 0;
+        di = dictGetIterator(types_dict);
+        while ((de = dictNext(di)))
+            line_count += cleanPrintfln("");
+        dictReleaseIterator(di);
+        printf("\033[%dA\r", line_count);
+    }
+
     if(types) zfree(types);
     if(sizes) zfree(sizes);
 
     /* We're done */
     printf("\n-------- summary -------\n\n");
-    if (force_cancel_loop) printf("[%05.2f%%] ", pct);
-    printf("Sampled %llu keys in the keyspace!\n", sampled);
+
+    /* Show percentage and sampled output when writing to a file */
+    if (!IS_TTY_OR_FAKETTY()) {
+        if (force_cancel_loop) printf("[%05.2f%%] ", pct);
+        printf("Sampled %llu keys in the keyspace!\n", sampled);
+    }
+
     printf("Total key length in bytes is %llu (avg len %.2f)\n\n",
        totlen, totlen ? (double)totlen/sampled : 0);
 
@@ -9148,7 +9369,7 @@ static void findBigKeys(int memkeys, unsigned memkeys_samples) {
     while ((de = dictNext(di))) {
         typeinfo *type = dictGetVal(de);
         if(type->biggest_key) {
-            printf("Biggest %6s found '%s' has %llu %s\n", type->name, type->biggest_key,
+            printf("Biggest %6s found %s has %llu %s\n", type->name, type->biggest_key,
                type->biggest, !memkeys? type->sizeunit: "bytes");
         }
     }
@@ -9214,8 +9435,10 @@ static void findHotKeys(void) {
     unsigned long long counters[HOTKEYS_SAMPLE] = {0};
     sds hotkeys[HOTKEYS_SAMPLE] = {NULL};
     unsigned long long sampled = 0, total_keys, *freqs = NULL, it = 0, scan_loops = 0;
-    unsigned int arrsize = 0, i, k;
+    unsigned int arrsize = 0, i;
+    int k;
     double pct;
+    long long refresh_time = mstime();
 
     signal(SIGINT, longStatLoopModeStop);
     /* Total keys pre scanning */
@@ -9226,6 +9449,9 @@ static void findHotKeys(void) {
     printf("# average sizes per key type.  You can use -i 0.1 to sleep 0.1 sec\n");
     printf("# per 100 SCAN commands (not usually needed).\n\n");
 
+    /* Use readonly in cluster */
+    sendReadOnly();
+    
     /* SCAN loop */
     do {
         /* Calculate approximate percentage completion */
@@ -9253,8 +9479,10 @@ static void findHotKeys(void) {
         /* Now update our stats */
         for(i=0;i<keys->elements;i++) {
             sampled++;
-            /* Update overall progress */
-            if(sampled % 1000000 == 0) {
+
+            /* Update overall progress.
+             * Only show the original progress output when writing to a file */
+            if (sampled % 1000000 == 0 && !IS_TTY_OR_FAKETTY()) {
                 printf("[%05.2f%%] Sampled %llu keys so far\n", pct, sampled);
             }
 
@@ -9272,9 +9500,30 @@ static void findHotKeys(void) {
             }
             counters[k] = freqs[i];
             hotkeys[k] = sdscatrepr(sdsempty(), keys->element[i]->str, keys->element[i]->len);
-            printf(
-               "[%05.2f%%] Hot key '%s' found so far with counter %llu\n",
-               pct, hotkeys[k], freqs[i]);
+
+            /* Only show the original progress output when writing to a file */
+            if (!IS_TTY_OR_FAKETTY()) {
+                printf("[%05.2f%%] Hot key %s found so far with counter %llu\n",
+                    pct, hotkeys[k], freqs[i]);
+            }
+        }
+
+        /* Show the progress bar in TTY */
+        if (mstime() > refresh_time + REFRESH_INTERVAL && IS_TTY_OR_FAKETTY()) {
+            int line_count = 0;
+            refresh_time = mstime();
+
+            line_count = displayKeyStatsProgressbar(sampled, total_keys);
+            line_count += cleanPrintfln("");
+
+            for (k = HOTKEYS_SAMPLE - 1; k >= 0; k--) {
+                if (counters[k] > 0) {
+                    line_count += cleanPrintfln("hot key found with counter: %llu\tkeyname: %s", 
+                        counters[k], hotkeys[k]);
+                }
+            }
+
+            printf("\033[%dA\r", line_count);
         }
 
         /* Sleep if we've been directed to do so */
@@ -9285,16 +9534,30 @@ static void findHotKeys(void) {
         freeReplyObject(reply);
     } while(force_cancel_loop ==0 && it != 0);
 
+    /* Final progress bar in TTY */
+    if (IS_TTY_OR_FAKETTY()) {
+        displayKeyStatsProgressbar(sampled, total_keys);
+
+        /* clean the types info shown during the progress bar */
+        int line_count = 0;
+        for (k = 0; k <= HOTKEYS_SAMPLE; k++)
+            line_count += cleanPrintfln("");
+        printf("\033[%dA\r", line_count);
+    }
+
     if (freqs) zfree(freqs);
 
     /* We're done */
     printf("\n-------- summary -------\n\n");
-    if(force_cancel_loop)printf("[%05.2f%%] ",pct);
-    printf("Sampled %llu keys in the keyspace!\n", sampled);
 
-    for (i=1; i<= HOTKEYS_SAMPLE; i++) {
-        k = HOTKEYS_SAMPLE - i;
-        if(counters[k]>0) {
+    /* Show the original output when writing to a file */
+    if (!IS_TTY_OR_FAKETTY()) {
+        if(force_cancel_loop) printf("[%05.2f%%] ",pct);
+        printf("Sampled %llu keys in the keyspace!\n", sampled);
+    }
+
+    for (k = HOTKEYS_SAMPLE - 1; k >= 0; k--) {
+        if (counters[k] > 0) {
             printf("hot key found with counter: %llu\tkeyname: %s\n", counters[k], hotkeys[k]);
             sdsfree(hotkeys[k]);
         }
@@ -9339,9 +9602,11 @@ static long getLongInfoField(char *info, char *field) {
 }
 
 /* Convert number of bytes into a human readable string of the form:
- * 100B, 2G, 100M, 4K, and so forth. */
-void bytesToHuman(char *s, size_t size, long long n) {
+ * 1003B, 4.03K, 100.00M, 2.32G, 3.01T 
+ * Returns the parameter `s` containing the converted number. */
+char *bytesToHuman(char *s, size_t size, long long n) {
     double d;
+    char *r = s;
 
     if (n < 0) {
         *s = '-';
@@ -9351,7 +9616,6 @@ void bytesToHuman(char *s, size_t size, long long n) {
     if (n < 1024) {
         /* Bytes */
         snprintf(s,size,"%lldB",n);
-        return;
     } else if (n < (1024*1024)) {
         d = (double)n/(1024);
         snprintf(s,size,"%.2fK",d);
@@ -9361,12 +9625,18 @@ void bytesToHuman(char *s, size_t size, long long n) {
     } else if (n < (1024LL*1024*1024*1024)) {
         d = (double)n/(1024LL*1024*1024);
         snprintf(s,size,"%.2fG",d);
+    } else if (n < (1024LL*1024*1024*1024*1024)) {
+        d = (double)n/(1024LL*1024*1024*1024);
+        snprintf(s,size,"%.2fT",d);
     }
+
+    return r;
 }
 
 static void statMode(void) {
     redisReply *reply;
     long aux, requests = 0;
+    int dbnum = getDatabases();
     int i = 0;
 
     while(1) {
@@ -9390,7 +9660,7 @@ static void statMode(void) {
 
         /* Keys */
         aux = 0;
-        for (j = 0; j < 20; j++) {
+        for (j = 0; j < dbnum; j++) {
             long k;
 
             snprintf(buf,sizeof(buf),"db%d:keys",j);
@@ -9742,6 +10012,559 @@ void testHintSuite(char *filename) {
     exit(fail);
 }
 
+/*------------------------------------------------------------------------------
+ * Keystats
+ *--------------------------------------------------------------------------- */
+
+/* Key name length distribution. */
+
+typedef struct size_dist_entry {
+    unsigned long long size;        /* Key name size in bytes. */
+    unsigned long long count;       /* Number of key names that are less or equal to the size. */
+} size_dist_entry;
+
+typedef struct size_dist {
+    unsigned long long total_count; /* Total number of key names in the distribution. */
+    unsigned long long total_size;  /* Sum of all the key name sizes in bytes. */
+    unsigned long long max_size;    /* Highest key name size in bytes. */
+    size_dist_entry *size_dist;     /* Array of sizes and key names count per size. */
+} size_dist;
+
+/* distribution is an array initialized with last element {0, 0}
+ * for instance: size_dist_entry distribution[] = { {32, 0}, {256, 0}, {0, 0} }; */
+static void sizeDistInit(size_dist *dist, size_dist_entry *distribution) {
+    dist->max_size = 0;
+    dist->total_count = 0;
+    dist->total_size = 0;
+    dist->size_dist = distribution;
+}
+
+static void addSizeDist(size_dist *dist, unsigned long long size) {
+    dist->total_count++;
+    dist->total_size += size;
+
+    if (size > dist->max_size)
+        dist->max_size = size;
+
+    int j;
+    for (j=0; dist->size_dist[j].size && size > dist->size_dist[j].size; j++);
+    dist->size_dist[j].count++;
+}
+
+static int displayKeyStatsLengthDist(size_dist *dist) {
+    int line_count = 0;
+    unsigned long long total_keys = 0, size;
+    char buf[2][256];
+
+    line_count += cleanPrintfln("Key name length Percentile Total keys");
+    line_count += cleanPrintfln("--------------- ---------- -----------");
+
+    for (int i=0; dist->size_dist[i].size; i++) {
+        if (dist->size_dist[i].count) {
+            if (dist->max_size < dist->size_dist[i].size) {
+                size = dist->max_size;
+            } else {
+                size = dist->size_dist[i].size;
+            }
+            total_keys += dist->size_dist[i].count;
+            line_count += cleanPrintfln("%15s %9.4f%% %11llu",
+                bytesToHuman(buf[1], sizeof(buf[1]), size),
+                (double)100 * total_keys / dist->total_count,
+                total_keys);
+        }
+    }
+
+    if (total_keys < dist->total_count) {
+        line_count += cleanPrintfln("           inf %9.4f%% %11llu", 100.0, dist->total_count);
+    }
+
+    line_count += cleanPrintfln("Total key length is %s (%s avg)",
+        bytesToHuman(buf[0], sizeof(buf[0]), dist->total_size),
+        dist->total_count ? bytesToHuman(buf[1], sizeof(buf[1]), dist->total_size/dist->total_count) : "0");
+
+    return line_count;
+}
+
+#define PROGRESSBAR_WIDTH 60
+static int displayKeyStatsProgressbar(unsigned long long sampled,
+                                      unsigned long long total_keys)
+{
+    int line_count = 0;
+    char progressbar[512];
+    char buf[2][128];
+
+    /* We can go over 100% if keys are added in the middle of the scans.
+     * Cap at 100% or the progressbar memset will overflow. */
+    double completion_pct = total_keys ? sampled < total_keys ? (double) sampled/total_keys : 1 : 0;
+
+    /* If we are not redirecting to a file, build the progress bar */
+    if (IS_TTY_OR_FAKETTY()) {
+        int completed_width = (int)round(PROGRESSBAR_WIDTH * completion_pct);
+        memset(buf[0], '|', completed_width);
+        buf[0][completed_width]= '\0';
+
+        int uncompleted_width = PROGRESSBAR_WIDTH - completed_width;
+        memset(buf[1], '-', uncompleted_width);
+        buf[1][uncompleted_width]= '\0';
+
+        char red[] = "\033[31m";
+        char green[] = "\033[32m";
+        char default_color[] = "\033[39m";
+        snprintf(progressbar, sizeof(progressbar), "%s%s%s%s%s",
+            green, buf[0], red, buf[1], default_color);
+    } else {
+        snprintf(progressbar, sizeof(progressbar), "%s", "keys scanned");
+    }
+
+    line_count += cleanPrintfln("%6.2f%% %s", completion_pct * 100, progressbar);
+    line_count += cleanPrintfln("Keys sampled: %llu", sampled);
+
+    return line_count;
+}
+
+static int displayKeyStatsSizeType(dict *memkeys_types_dict) {
+    dictIterator *di;
+    dictEntry *de;
+    int line_count = 0;
+    char buf[256];
+
+    line_count += cleanPrintfln("--- Top size per type ---");
+    di = dictGetIterator(memkeys_types_dict);
+    while ((de = dictNext(di))) {
+        typeinfo *type = dictGetVal(de);
+        if (type->biggest_key) {
+            line_count += cleanPrintfln("%-10s %s is %s",
+                type->name, type->biggest_key,
+                bytesToHuman(buf, sizeof(buf),type->biggest));
+        }
+    }
+    dictReleaseIterator(di);
+
+    return line_count;
+}
+
+static int displayKeyStatsLengthType(dict *bigkeys_types_dict) {
+    dictIterator *di;
+    dictEntry *de;
+    int line_count = 0;
+    char buf[256];
+
+    line_count += cleanPrintfln("--- Top length and cardinality per type ---");
+    di = dictGetIterator(bigkeys_types_dict);
+    while ((de = dictNext(di))) {
+        typeinfo *type = dictGetVal(de);
+        if (type->biggest_key) {
+            if (!strcmp(type->sizeunit, "bytes")) {
+                bytesToHuman(buf, sizeof(buf), type->biggest);
+            } else {
+                snprintf(buf, sizeof(buf), "%llu %s", type->biggest, type->sizeunit);
+            }
+            line_count += cleanPrintfln("%-10s %s has %s", type->name, type->biggest_key, buf);
+        }
+    }
+    dictReleaseIterator(di);
+
+    return line_count;
+}
+
+static int displayKeyStatsSizeDist(struct hdr_histogram *keysize_histogram) {
+    int line_count = 0;
+    double percentile;
+    char size[32], mean[32], stddev[32];
+    struct hdr_iter iter;
+    int64_t last_displayed_cumulative_count = 0;
+
+    hdr_iter_percentile_init(&iter, keysize_histogram, 1);
+
+    line_count += cleanPrintfln("Key size Percentile Total keys");
+    line_count += cleanPrintfln("-------- ---------- -----------");
+
+    while (hdr_iter_next(&iter)) {
+        /* Skip repeat in hdr_histogram cumulative_count, and set the last line
+         * to 100% when total_count is reached. For instance:
+         * 140.68K    99.9969%        50013
+         * 140.68K    99.9977%        50013
+         *   2.04G    99.9985%        50014
+         *   2.04G   100.0000%        50014
+         * Will display:
+         * 140.68K    99.9969%        50013
+         *   2.04G   100.0000%        50014                                   */
+
+        if (iter.cumulative_count != last_displayed_cumulative_count) {
+            if (iter.cumulative_count == iter.h->total_count) {
+                percentile = 100;
+            } else {
+                percentile = iter.specifics.percentiles.percentile;
+            }
+
+            line_count += cleanPrintfln("%8s %9.4f%% %11lld",
+                bytesToHuman(size, sizeof(size), iter.highest_equivalent_value),
+                percentile,
+                iter.cumulative_count);
+
+            last_displayed_cumulative_count = iter.cumulative_count;
+        }
+    }
+
+    bytesToHuman(mean, sizeof(mean),hdr_mean(keysize_histogram));
+    bytesToHuman(stddev, sizeof(stddev),hdr_stddev(keysize_histogram));
+    line_count += cleanPrintfln("Note: 0.01%% size precision, Mean: %s, StdDeviation: %s", mean, stddev);
+
+    return line_count;
+}
+
+static int displayKeyStatsType(unsigned long long sampled,
+                               dict *memkeys_types_dict,
+                               dict *bigkeys_types_dict)
+{
+    dictIterator *di;
+    dictEntry *de;
+    int line_count = 0;
+    char total_size[64], size_avg[64], total_length[64], length_avg[64];
+
+    line_count += cleanPrintfln("Type        Total keys  Keys %% Tot size Avg size  Total length/card Avg ln/card");
+    line_count += cleanPrintfln("--------- ------------ ------- -------- -------- ------------------ -----------");
+
+    di = dictGetIterator(memkeys_types_dict);
+    while ((de = dictNext(di))) {
+        typeinfo *memkey_type = dictGetVal(de);
+        if (memkey_type->count) {
+            /* Key count, percentage, memkeys info */
+            bytesToHuman(total_size, sizeof(total_size), memkey_type->totalsize);
+            bytesToHuman(size_avg, sizeof(size_avg), memkey_type->totalsize/memkey_type->count);
+
+            strncpy(total_length, " - ", sizeof(total_length));
+            strncpy(length_avg, " - ", sizeof(length_avg));
+
+            /* bigkeys info */
+            dictEntry *bk_de = dictFind(bigkeys_types_dict, memkey_type->name);
+            if (bk_de) { /* If we have it in memkeys it should be in bigkeys */
+                typeinfo *bigkey_type = dictGetVal(bk_de);
+                if (bigkey_type->sizecmd && bigkey_type->count) {
+                    double avg = (double)bigkey_type->totalsize/bigkey_type->count;
+                    if (!strcmp(bigkey_type->sizeunit, "bytes")) {
+                        bytesToHuman(total_length, sizeof(total_length), bigkey_type->totalsize);
+                        bytesToHuman(length_avg, sizeof(length_avg), (long long)round(avg)); /* better than truncating */
+                    } else {
+                        snprintf(total_length, sizeof(total_length), "%llu %s", bigkey_type->totalsize, bigkey_type->sizeunit);
+                        snprintf(length_avg, sizeof(length_avg), "%.2f", avg);
+                    }
+                }
+            }
+            /* Print the line for the given Redis type */
+            line_count += cleanPrintfln("%-10s %11llu %6.2f%% %8s %8s %18s %11s",
+                memkey_type->name, memkey_type->count,
+                sampled ? 100 * (double)memkey_type->count/sampled : 0,
+                total_size, size_avg, total_length, length_avg);
+        }
+    }
+    dictReleaseIterator(di);
+
+    return line_count;
+}
+
+typedef struct key_info {
+    unsigned long long size;
+    char type_name[10]; /* Key type name seems to be 9 char max + \0 */
+    sds key_name;
+} key_info;
+
+static int displayKeyStatsTopSizes(list *top_key_sizes, unsigned long top_sizes_limit) {
+    int line_count = 0, i = 0;
+
+    line_count += cleanPrintfln("--- Top %llu key sizes ---", top_sizes_limit);
+    char buffer[32];
+    listIter *iter = listGetIterator(top_key_sizes, AL_START_HEAD);
+    listNode *node;
+    while ((node = listNext(iter)) != NULL) {
+        key_info *key = (key_info*) listNodeValue(node);
+        line_count += cleanPrintfln("%3d %8s %-10s %s", ++i, bytesToHuman(buffer, sizeof(buffer), key->size),
+                                    key->type_name, key->key_name);
+    }
+    listReleaseIterator(iter);
+
+    return line_count;
+}
+
+static key_info *createKeySizeInfo(char *key_name, size_t key_name_len, char *key_type, unsigned long long size) {
+    key_info *key = zmalloc(sizeof(key_info));
+    key->size = size;
+    snprintf(key->type_name, sizeof(key->type_name), "%s", key_type);
+    key->key_name = sdscatrepr(sdsempty(), key_name, key_name_len);
+    if (!key->key_name) {
+        fprintf(stderr, "Failed to allocate memory for key name.\n");
+        exit(1);
+    }
+    return key;
+}
+
+/* Insert key info in topkeys sorted by size (from high to low size).
+ * Keep a maximum of config.top_sizes_limit items in topkeys list.
+ * key_name and type_name are copied.
+ * Return: 0 size was not added (too small), 1 size was inserted.  */
+static int updateTopSizes(char *key_name, size_t key_name_len, unsigned long long key_size,
+                          char *type_name, list *topkeys, unsigned long top_sizes_limit)
+{
+    listNode *node;
+    listIter *iter;
+    key_info *new_node;
+
+    /* Check if we do not need to add to the list */
+    if (top_sizes_limit != 0 &&
+        topkeys->len == top_sizes_limit &&
+        key_size <= ((key_info*)topkeys->tail->value)->size){
+        return 0;
+    }
+
+    /* Find where to insert the new key size */
+    iter = listGetIterator(topkeys, AL_START_HEAD);
+    do {
+        node = listNext(iter);
+    } while (node != NULL && key_size <= ((key_info*)node->value)->size);
+    listReleaseIterator(iter);
+
+    new_node = createKeySizeInfo(key_name, key_name_len, type_name, key_size);
+    if (node) {
+        /* Insert before the node */
+        listInsertNode(topkeys, node, new_node, 0);
+    } else {
+        /* Insert as the last node */
+        listAddNodeTail(topkeys, new_node);
+    }
+
+    /* Trim to stay within the limit */
+    if (topkeys->len == top_sizes_limit + 1) {
+        sdsfree(((key_info*)topkeys->tail->value)->key_name);
+        listDelNode(topkeys, topkeys->tail); /* list->free is set */
+    }
+
+    return 1;
+}
+
+static void displayKeyStats(unsigned long long sampled, unsigned long long total_keys,
+                            unsigned long long total_size, dict *memkeys_types_dict,
+                            dict *bigkeys_types_dict, list *top_key_sizes,
+                            unsigned long top_sizes_limit, int move_cursor_up)
+{
+    int line_count = 0;
+    char buf[256];
+
+    line_count += displayKeyStatsProgressbar(sampled, total_keys);
+    line_count += cleanPrintfln("Keys size:    %s", bytesToHuman(buf, sizeof(buf), total_size));
+    line_count += cleanPrintfln("");
+    line_count += displayKeyStatsTopSizes(top_key_sizes, top_sizes_limit);
+    line_count += cleanPrintfln("");
+    line_count += displayKeyStatsSizeType(memkeys_types_dict);
+    line_count += cleanPrintfln("");
+    line_count += displayKeyStatsLengthType(bigkeys_types_dict);
+
+    /* If we need to refresh the stats */
+    if (move_cursor_up) {
+        printf("\033[%dA\r", line_count);
+    }
+
+    fflush(stdout);
+}
+
+static void updateKeyType(redisReply *element, unsigned long long size, typeinfo *type) {
+    type->totalsize += size;
+    type->count++;
+
+    if (type->biggest<size) {
+        /* Keep track of biggest key name for this type */
+        if (type->biggest_key)
+            sdsfree(type->biggest_key);
+        type->biggest_key = sdsnewlen(element->str, element->len);
+        if (!type->biggest_key) {
+            fprintf(stderr, "Failed to allocate memory for key!\n");
+            exit(1);
+        }
+        /* Keep track of the biggest size for this type */
+        type->biggest = size;
+    }
+}
+
+static void keyStats(long long memkeys_samples, unsigned long long cursor, unsigned long top_sizes_limit) {
+    unsigned long long sampled = 0, total_keys, total_size = 0, it = 0, scan_loops = 0;
+    unsigned long long *memkeys_sizes = NULL, *bigkeys_sizes = NULL;
+    redisReply *reply, *keys;
+    unsigned int array_size = 0, i;
+    typeinfo **memkeys_types = NULL, **bigkeys_types = NULL;
+    list *top_sizes;
+    long long refresh_time = mstime();
+
+    if (cursor != 0) {
+        it = cursor;
+    }
+
+    if ((top_sizes = listCreate()) == NULL) {
+        fprintf(stderr, "top_sizes list creation failed.\n");
+        exit(1);
+    }
+    top_sizes->free = zfree;
+
+    dict *memkeys_types_dict = dictCreate(&typeinfoDictType);
+    typeinfo_add(memkeys_types_dict, "string", &type_string);
+    typeinfo_add(memkeys_types_dict, "list", &type_list);
+    typeinfo_add(memkeys_types_dict, "set", &type_set);
+    typeinfo_add(memkeys_types_dict, "hash", &type_hash);
+    typeinfo_add(memkeys_types_dict, "zset", &type_zset);
+    typeinfo_add(memkeys_types_dict, "stream", &type_stream);
+
+    /* We could use only one typeinfo dictionary if we add new fields to save
+     * both memkey and bigkey info. Not sure it would make sense in findBigKeys(). */
+    dict *bigkeys_types_dict = dictCreate(&typeinfoDictType);
+    typeinfo_add(bigkeys_types_dict, "string", &type_string);
+    typeinfo_add(bigkeys_types_dict, "list", &type_list);
+    typeinfo_add(bigkeys_types_dict, "set", &type_set);
+    typeinfo_add(bigkeys_types_dict, "hash", &type_hash);
+    typeinfo_add(bigkeys_types_dict, "zset", &type_zset);
+    typeinfo_add(bigkeys_types_dict, "stream", &type_stream);
+
+    size_dist key_length_dist;
+    size_dist_entry distribution[] = {
+        {1<<5, 0},                 /*  32 B  (sds)                                            */
+        {1<<8, 0},                 /* 256 B  (sds)                                            */
+        {1<<16, 0},                /*  64 KB (sds and Redis Enterprise key name max length)   */
+        {1024*1024, 0},            /*   1 MB                                                  */
+        {16*1024*1024, 0},         /*  16 MB                                                  */
+        {128*1024*1024, 0},        /* 128 MB                                                  */
+        {512*1024*1024, 0},        /* 512 MB (max String size)                                */
+        {0, 0},                    /* Sizes above the last entry                              */
+    };
+    sizeDistInit(&key_length_dist, distribution);
+
+    struct hdr_histogram *keysize_histogram;
+    /* Record max of 1TB for a key size should cover all keys.
+     * significant_figures == 4 (0.01% precision on key size)  */
+    if (hdr_init(1, 1ULL*1024*1024*1024*1024, 4, &keysize_histogram)) {
+        fprintf(stderr, "Keystats hdr init error\n");
+        exit(1);
+    }
+
+    signal(SIGINT, longStatLoopModeStop);
+
+    /* Total keys pre scanning */
+    total_keys = getDbSize();
+
+    /* Status message */
+    printf("\n# Scanning the entire keyspace to find the biggest keys and distribution information.\n");
+    printf("# Use -i 0.1 to sleep 0.1 sec per 100 SCAN commands (not usually needed).\n");
+    printf("# Use --cursor <n> to start the scan at the cursor <n> (usually after a Ctrl-C).\n");
+    printf("# Use --top <n> to display <n> top key sizes (default is 10).\n");
+    printf("# Ctrl-C to stop the scan.\n\n");
+
+    /* Use readonly in cluster */
+    sendReadOnly();
+
+    /* SCAN loop */
+    do {
+        /* Grab some keys and point to the keys array */
+        reply = sendScan(&it);
+        scan_loops++;
+        keys = reply->element[1];
+
+        /* Reallocate our type and size array if we need to */
+        if (keys->elements > array_size) {
+            memkeys_types = zrealloc(memkeys_types, sizeof(typeinfo*)*keys->elements);
+            memkeys_sizes = zrealloc(memkeys_sizes, sizeof(unsigned long long)*keys->elements);
+
+            bigkeys_types = zrealloc(bigkeys_types, sizeof(typeinfo*)*keys->elements);
+            bigkeys_sizes = zrealloc(bigkeys_sizes, sizeof(unsigned long long)*keys->elements);
+
+            if (!memkeys_types || !memkeys_sizes || !bigkeys_types || !bigkeys_sizes) {
+                fprintf(stderr, "Failed to allocate storage for keys!\n");
+                exit(1);
+            }
+
+            array_size = keys->elements;
+        }
+
+        /* Retrieve types and sizes for memkeys */
+        getKeyTypes(memkeys_types_dict, keys, memkeys_types);
+        getKeySizes(keys, memkeys_types, memkeys_sizes, 1, memkeys_samples);
+
+        /* Retrieve types and sizes for bigkeys */
+        getKeyTypes(bigkeys_types_dict, keys, bigkeys_types);
+        getKeySizes(keys, bigkeys_types, bigkeys_sizes, 0, memkeys_samples);
+
+        for (i=0; i<keys->elements; i++) {
+            /* Skip keys that disappeared between SCAN and TYPE */
+            if (!memkeys_types[i] || !bigkeys_types[i]) {
+                continue;
+            }
+
+            total_size += memkeys_sizes[i];
+            sampled++;
+
+            updateTopSizes(keys->element[i]->str, keys->element[i]->len, memkeys_sizes[i],
+                           memkeys_types[i]->name, top_sizes, top_sizes_limit);
+            updateKeyType(keys->element[i], memkeys_sizes[i], memkeys_types[i]);
+            updateKeyType(keys->element[i], bigkeys_sizes[i], bigkeys_types[i]);
+
+            /* Key Size distribution */
+            if (hdr_record_value(keysize_histogram, memkeys_sizes[i]) == 0) {
+                fprintf(stderr, "Value %llu not added in the hdr histogram.\n", memkeys_sizes[i]);
+            }
+
+            /* Key length distribution */
+            addSizeDist(&key_length_dist, keys->element[i]->len);
+        }
+
+        /* Refresh keystats info on regular basis */
+        if (mstime() > refresh_time + REFRESH_INTERVAL && IS_TTY_OR_FAKETTY()) {
+            displayKeyStats(sampled, total_keys, total_size, memkeys_types_dict, bigkeys_types_dict,
+                top_sizes, top_sizes_limit, 1);
+            refresh_time = mstime();
+        }
+
+        /* Sleep if we've been directed to do so */
+        if (config.interval && (scan_loops % 100) == 0) {
+            usleep(config.interval);
+        }
+
+        freeReplyObject(reply);
+    } while(force_cancel_loop == 0 && it != 0);
+
+    displayKeyStats(sampled, total_keys, total_size, memkeys_types_dict, bigkeys_types_dict, top_sizes,
+                    top_sizes_limit, 0);
+
+    /* Additional data at the end of the SCAN loop.
+     * Using cleanPrintfln in case we want to print during the SCAN loop. */
+    cleanPrintfln("");
+    displayKeyStatsSizeDist(keysize_histogram);
+    cleanPrintfln("");
+    displayKeyStatsLengthDist(&key_length_dist);
+    cleanPrintfln("");
+    displayKeyStatsType(sampled, memkeys_types_dict, bigkeys_types_dict);
+
+    if (it != 0) {
+        printf("\n");
+        printf("Scan interrupted:\n");
+        printf("Use 'redis-cli --keystats --cursor %llu' to restart from the last cursor.\n", it);
+    }
+
+    if (memkeys_types) zfree(memkeys_types);
+    if (bigkeys_types) zfree(bigkeys_types);
+    if (memkeys_sizes) zfree(memkeys_sizes);
+    if (bigkeys_sizes) zfree(bigkeys_sizes);
+    dictRelease(memkeys_types_dict);
+    dictRelease(bigkeys_types_dict);
+    hdr_close(keysize_histogram);
+
+    /* sdsfree before listRelease */
+    listIter *iter = listGetIterator(top_sizes, AL_START_HEAD);
+    listNode *node;
+    while ((node = listNext(iter)) != NULL) {
+        key_info *key = (key_info*) listNodeValue(node);
+        sdsfree(key->key_name);
+    }
+    listReleaseIterator(iter);
+    listRelease(top_sizes); /* list->free is set */
+
+    exit(0);
+}
+
 /*------------------------------------------------------------------------------
  * Program main()
  *--------------------------------------------------------------------------- */
@@ -9753,6 +10576,8 @@ int main(int argc, char **argv) {
     memset(&config.sslconfig, 0, sizeof(config.sslconfig));
     config.conn_info.hostip = sdsnew("127.0.0.1");
     config.conn_info.hostport = 6379;
+    config.connect_timeout.tv_sec = 0;
+    config.connect_timeout.tv_usec = 0;
     config.hostsocket = NULL;
     config.repeat = 1;
     config.interval = 0;
@@ -9782,6 +10607,10 @@ int main(int argc, char **argv) {
     config.pipe_mode = 0;
     config.pipe_timeout = REDIS_CLI_DEFAULT_PIPE_TIMEOUT;
     config.bigkeys = 0;
+    config.memkeys = 0;
+    config.keystats = 0;
+    config.cursor = 0;
+    config.top_sizes_limit = 10;
     config.hotkeys = 0;
     config.stdin_lastarg = 0;
     config.stdin_tag_arg = 0;
@@ -9801,6 +10630,8 @@ int main(int argc, char **argv) {
     config.no_auth_warning = 0;
     config.in_multi = 0;
     config.server_version = NULL;
+    config.prefer_ipv4 = 0;
+    config.prefer_ipv6 = 0;
     config.cluster_manager_command.name = NULL;
     config.cluster_manager_command.argc = 0;
     config.cluster_manager_command.argv = NULL;
@@ -9918,6 +10749,12 @@ int main(int argc, char **argv) {
         findBigKeys(1, config.memkeys_samples);
     }
 
+    /* Find big and large keys */
+    if (config.keystats) {
+        if (cliConnect(0) == REDIS_ERR) exit(1);
+        keyStats(config.memkeys_samples, config.cursor, config.top_sizes_limit);
+    }
+
     /* Find hot keys */
     if (config.hotkeys) {
         if (cliConnect(0) == REDIS_ERR) exit(1);
diff --git a/src/redisassert.c b/src/redisassert.c
index 9f7402e7482..fb16bd6a2fe 100644
--- a/src/redisassert.c
+++ b/src/redisassert.c
@@ -6,7 +6,7 @@
  *
  * ----------------------------------------------------------------------------
  *
- * Copyright (c) 2021, Andy Pan <panjf2000@gmail.com> and Redis Labs
+ * Copyright (c) 2021, Andy Pan <panjf2000@gmail.com> and Redis Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/src/redisassert.h b/src/redisassert.h
index a3f95da091d..a85cac6df7d 100644
--- a/src/redisassert.h
+++ b/src/redisassert.h
@@ -7,32 +7,11 @@
  *
  * ----------------------------------------------------------------------------
  *
- * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2006-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __REDIS_ASSERT_H__
diff --git a/src/redismodule.h b/src/redismodule.h
index 4378126e2b0..8b5d2beb65d 100644
--- a/src/redismodule.h
+++ b/src/redismodule.h
@@ -959,8 +959,10 @@ typedef struct RedisModuleTypeMethods {
 REDISMODULE_API void * (*RedisModule_Alloc)(size_t bytes) REDISMODULE_ATTR;
 REDISMODULE_API void * (*RedisModule_TryAlloc)(size_t bytes) REDISMODULE_ATTR;
 REDISMODULE_API void * (*RedisModule_Realloc)(void *ptr, size_t bytes) REDISMODULE_ATTR;
+REDISMODULE_API void * (*RedisModule_TryRealloc)(void *ptr, size_t bytes) REDISMODULE_ATTR;
 REDISMODULE_API void (*RedisModule_Free)(void *ptr) REDISMODULE_ATTR;
 REDISMODULE_API void * (*RedisModule_Calloc)(size_t nmemb, size_t size) REDISMODULE_ATTR;
+REDISMODULE_API void * (*RedisModule_TryCalloc)(size_t nmemb, size_t size) REDISMODULE_ATTR;
 REDISMODULE_API char * (*RedisModule_Strdup)(const char *str) REDISMODULE_ATTR;
 REDISMODULE_API int (*RedisModule_GetApi)(const char *, void *) REDISMODULE_ATTR;
 REDISMODULE_API int (*RedisModule_CreateCommand)(RedisModuleCtx *ctx, const char *name, RedisModuleCmdFunc cmdfunc, const char *strflags, int firstkey, int lastkey, int keystep) REDISMODULE_ATTR;
@@ -968,6 +970,7 @@ REDISMODULE_API RedisModuleCommand *(*RedisModule_GetCommand)(RedisModuleCtx *ct
 REDISMODULE_API int (*RedisModule_CreateSubcommand)(RedisModuleCommand *parent, const char *name, RedisModuleCmdFunc cmdfunc, const char *strflags, int firstkey, int lastkey, int keystep) REDISMODULE_ATTR;
 REDISMODULE_API int (*RedisModule_SetCommandInfo)(RedisModuleCommand *command, const RedisModuleCommandInfo *info) REDISMODULE_ATTR;
 REDISMODULE_API int (*RedisModule_SetCommandACLCategories)(RedisModuleCommand *command, const char *ctgrsflags) REDISMODULE_ATTR;
+REDISMODULE_API int (*RedisModule_AddACLCategory)(RedisModuleCtx *ctx, const char *name) REDISMODULE_ATTR;
 REDISMODULE_API void (*RedisModule_SetModuleAttribs)(RedisModuleCtx *ctx, const char *name, int ver, int apiver) REDISMODULE_ATTR;
 REDISMODULE_API int (*RedisModule_IsModuleNameBusy)(const char *name) REDISMODULE_ATTR;
 REDISMODULE_API int (*RedisModule_WrongArity)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
@@ -1250,6 +1253,8 @@ REDISMODULE_API void (*RedisModule_GetRandomBytes)(unsigned char *dst, size_t le
 REDISMODULE_API void (*RedisModule_GetRandomHexChars)(char *dst, size_t len) REDISMODULE_ATTR;
 REDISMODULE_API void (*RedisModule_SetDisconnectCallback)(RedisModuleBlockedClient *bc, RedisModuleDisconnectFunc callback) REDISMODULE_ATTR;
 REDISMODULE_API void (*RedisModule_SetClusterFlags)(RedisModuleCtx *ctx, uint64_t flags) REDISMODULE_ATTR;
+REDISMODULE_API unsigned int (*RedisModule_ClusterKeySlot)(RedisModuleString *key) REDISMODULE_ATTR;
+REDISMODULE_API const char *(*RedisModule_ClusterCanonicalKeyNameInSlot)(unsigned int slot) REDISMODULE_ATTR;
 REDISMODULE_API int (*RedisModule_ExportSharedAPI)(RedisModuleCtx *ctx, const char *apiname, void *func) REDISMODULE_ATTR;
 REDISMODULE_API void * (*RedisModule_GetSharedAPI)(RedisModuleCtx *ctx, const char *apiname) REDISMODULE_ATTR;
 REDISMODULE_API RedisModuleCommandFilter * (*RedisModule_RegisterCommandFilter)(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc cb, int flags) REDISMODULE_ATTR;
@@ -1321,14 +1326,17 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int
     REDISMODULE_GET_API(Alloc);
     REDISMODULE_GET_API(TryAlloc);
     REDISMODULE_GET_API(Calloc);
+    REDISMODULE_GET_API(TryCalloc);
     REDISMODULE_GET_API(Free);
     REDISMODULE_GET_API(Realloc);
+    REDISMODULE_GET_API(TryRealloc);
     REDISMODULE_GET_API(Strdup);
     REDISMODULE_GET_API(CreateCommand);
     REDISMODULE_GET_API(GetCommand);
     REDISMODULE_GET_API(CreateSubcommand);
     REDISMODULE_GET_API(SetCommandInfo);
     REDISMODULE_GET_API(SetCommandACLCategories);
+    REDISMODULE_GET_API(AddACLCategory);
     REDISMODULE_GET_API(SetModuleAttribs);
     REDISMODULE_GET_API(IsModuleNameBusy);
     REDISMODULE_GET_API(WrongArity);
@@ -1611,6 +1619,8 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int
     REDISMODULE_GET_API(GetRandomBytes);
     REDISMODULE_GET_API(GetRandomHexChars);
     REDISMODULE_GET_API(SetClusterFlags);
+    REDISMODULE_GET_API(ClusterKeySlot);
+    REDISMODULE_GET_API(ClusterCanonicalKeyNameInSlot);
     REDISMODULE_GET_API(ExportSharedAPI);
     REDISMODULE_GET_API(GetSharedAPI);
     REDISMODULE_GET_API(RegisterCommandFilter);
diff --git a/src/release.c b/src/release.c
index adc7e55ddb9..f6619bb1dd9 100644
--- a/src/release.c
+++ b/src/release.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 /* Every time the Redis Git SHA1 or Dirty status changes only this small
diff --git a/src/replication.c b/src/replication.c
index 97e01b64df0..a3d4eb15cbe 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -1,31 +1,10 @@
 /* Asynchronous replication implementation.
  *
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 
@@ -210,6 +189,9 @@ int canFeedReplicaReplBuffer(client *replica) {
     /* Don't feed replicas that are still waiting for BGSAVE to start. */
     if (replica->replstate == SLAVE_STATE_WAIT_BGSAVE_START) return 0;
 
+    /* Don't feed replicas that are going to be closed ASAP. */
+    if (replica->flags & CLIENT_CLOSE_ASAP) return 0;
+
     return 1;
 }
 
@@ -610,6 +592,7 @@ void replicationFeedMonitors(client *c, list *monitors, int dictid, robj **argv,
     while((ln = listNext(&li))) {
         client *monitor = ln->value;
         addReply(monitor,cmdobj);
+        updateClientMemUsageAndBucket(monitor);
     }
     decrRefCount(cmdobj);
 }
@@ -877,7 +860,7 @@ int startBgsaveForReplication(int mincapa, int req) {
             retval = rdbSaveToSlavesSockets(req,rsiptr);
         else {
             /* Keep the page cache since it'll get used soon */
-            retval = rdbSaveBackground(req,server.rdb_filename,rsiptr,RDBFLAGS_KEEP_CACHE);
+            retval = rdbSaveBackground(req, server.rdb_filename, rsiptr, RDBFLAGS_REPLICATION | RDBFLAGS_KEEP_CACHE);
         }
     } else {
         serverLog(LL_WARNING,"BGSAVE for replication: replication information not available, can't generate the RDB file right now. Try later.");
@@ -950,7 +933,11 @@ void syncCommand(client *c) {
         }
 
         if (!strcasecmp(c->argv[1]->ptr,server.replid)) {
-            replicationUnsetMaster();
+            if (server.cluster_enabled) {
+                clusterPromoteSelfToMaster();
+            } else {
+                replicationUnsetMaster();
+            }
             sds client = catClientInfoString(sdsempty(),c);
             serverLog(LL_NOTICE,
                 "MASTER MODE enabled (failover request from '%s')",client);
@@ -1254,7 +1241,7 @@ void replconfCommand(client *c) {
             int filter_count, i;
             sds *filters;
             if (!(filters = sdssplitargs(c->argv[j+1]->ptr, &filter_count))) {
-                addReplyErrorFormat(c, "Missing rdb-filter-only values");
+                addReplyError(c, "Missing rdb-filter-only values");
                 return;
             }
             /* By default filter out all parts of the rdb */
@@ -1735,7 +1722,7 @@ int slaveIsInHandshakeState(void) {
  * not, since the byte is indivisible.
  *
  * The function is called in two contexts: while we flush the current
- * data with emptyDb(), and while we load the new data received as an
+ * data with emptyData(), and while we load the new data received as an
  * RDB file from the master. */
 void replicationSendNewlineToMaster(void) {
     static time_t newline_sent;
@@ -1746,7 +1733,7 @@ void replicationSendNewlineToMaster(void) {
     }
 }
 
-/* Callback used by emptyDb() while flushing away old data to load
+/* Callback used by emptyData() while flushing away old data to load
  * the new dataset received by the master and by discardTempDb()
  * after loading succeeded or failed. */
 void replicationEmptyDbCallback(dict *d) {
@@ -2235,6 +2222,10 @@ void readSyncBulkPayload(connection *conn) {
                                     "disabled");
                 bg_unlink(server.rdb_filename);
             }
+
+            /* If disk-based RDB loading fails, remove the half-loaded dataset. */
+            emptyData(-1, empty_db_flags, replicationEmptyDbCallback);
+
             /* Note that there's no point in restarting the AOF on sync failure,
                it'll be restarted when sync succeeds or replica promoted. */
             return;
@@ -2249,6 +2240,7 @@ void readSyncBulkPayload(connection *conn) {
         }
 
         zfree(server.repl_transfer_tmpfile);
+        close(server.repl_transfer_fd);
         server.repl_transfer_fd = -1;
         server.repl_transfer_tmpfile = NULL;
     }
@@ -3772,7 +3764,7 @@ void replicationCron(void) {
          * match the one stored into 'mf_master_offset' state. */
         int manual_failover_in_progress =
             ((server.cluster_enabled &&
-              server.cluster->mf_end) ||
+              clusterManualFailoverTimeLimit()) ||
             server.failover_end_time) &&
             isPausedActionsWithUpdate(PAUSE_ACTION_REPLICA);
 
@@ -4059,12 +4051,10 @@ void abortFailover(const char *err) {
  * will attempt forever and must be manually aborted.
  */
 void failoverCommand(client *c) {
-    if (server.cluster_enabled) {
-        addReplyError(c,"FAILOVER not allowed in cluster mode. "
-                        "Use CLUSTER FAILOVER command instead.");
+    if (!clusterAllowFailoverCmd(c)) {
         return;
     }
-    
+
     /* Handle special case for abort */
     if ((c->argc == 2) && !strcasecmp(c->argv[1]->ptr,"abort")) {
         if (server.failover_state == NO_FAILOVER) {
diff --git a/src/resp_parser.c b/src/resp_parser.c
index b92a74cffbf..e20e9c93c6c 100644
--- a/src/resp_parser.c
+++ b/src/resp_parser.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2021, Redis Labs Ltd.
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 /* ----------------------------------------------------------------------------------------
diff --git a/src/resp_parser.h b/src/resp_parser.h
index 0b5c8e22c9d..9ca5afa4e03 100644
--- a/src/resp_parser.h
+++ b/src/resp_parser.h
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2021, Redis Labs Ltd.
+ * Copyright (c) 2021-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef SRC_RESP_PARSER_H_
diff --git a/src/rio.c b/src/rio.c
index eaf88d25fc6..9398a3f78d0 100644
--- a/src/rio.c
+++ b/src/rio.c
@@ -16,7 +16,7 @@
  * ----------------------------------------------------------------------------
  *
  * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-current, Redis Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/src/rio.h b/src/rio.h
index 9dd59d32b12..361d2004c4d 100644
--- a/src/rio.h
+++ b/src/rio.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
- * Copyright (c) 2009-2019, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-current, Redis Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/src/script.c b/src/script.c
index 6a798a6e143..a19304ab75f 100644
--- a/src/script.c
+++ b/src/script.c
@@ -1,36 +1,18 @@
 /*
- * Copyright (c) 2009-2021, Redis Ltd.
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
 #include "script.h"
 #include "cluster.h"
 
+#include <lua.h>
+#include <lauxlib.h>
+
 scriptFlag scripts_flags_def[] = {
     {.flag = SCRIPT_FLAG_NO_WRITES, .str = "no-writes"},
     {.flag = SCRIPT_FLAG_ALLOW_OOM, .str = "allow-oom"},
@@ -60,6 +42,63 @@ static void enterScriptTimedoutMode(scriptRunCtx *run_ctx) {
     blockingOperationStarts();
 }
 
+#if defined(USE_JEMALLOC)
+/* When lua uses jemalloc, pass in luaAlloc as a parameter of lua_newstate. */
+static void *luaAlloc(void *ud, void *ptr, size_t osize, size_t nsize) {
+    UNUSED(osize);
+
+    unsigned int tcache = (unsigned int)(uintptr_t)ud;
+    if (nsize == 0) {
+        zfree_with_flags(ptr, MALLOCX_ARENA(server.lua_arena) | MALLOCX_TCACHE(tcache));
+        return NULL;
+    } else {
+        return zrealloc_with_flags(ptr, nsize, MALLOCX_ARENA(server.lua_arena) | MALLOCX_TCACHE(tcache));
+    }
+}
+
+/* Create a lua interpreter, and use jemalloc as lua memory allocator. */
+lua_State *createLuaState(void) {
+    /* Every time a lua VM is created, a new private tcache is created for use.
+     * This private tcache will be destroyed after the lua VM is closed. */
+    unsigned int tcache;
+    size_t sz = sizeof(unsigned int);
+    int err = je_mallctl("tcache.create", (void *)&tcache, &sz, NULL, 0);
+    if (err) {
+        serverLog(LL_WARNING, "Failed creating the lua jemalloc tcache.");
+        exit(1);
+    }
+
+    /* We pass tcache as ud so that it is not bound to the server. */
+    return lua_newstate(luaAlloc, (void *)(uintptr_t)tcache);
+}
+
+/* Under jemalloc we need to create a new arena for lua to avoid blocking
+ * defragger. */
+void luaEnvInit(void) {
+    unsigned int arena;
+    size_t sz = sizeof(unsigned int);
+    int err = je_mallctl("arenas.create", (void *)&arena, &sz, NULL, 0);
+    if (err) {
+        serverLog(LL_WARNING, "Failed creating the lua jemalloc arena.");
+        exit(1);
+    }
+    server.lua_arena = arena;
+}
+
+#else
+
+/* Create a lua interpreter and use glibc (default) as lua memory allocator. */
+lua_State *createLuaState(void) {
+    return lua_open();
+}
+
+/* There is nothing to set up under glib. */
+void luaEnvInit(void) {
+    server.lua_arena = UINT_MAX;
+}
+
+#endif
+
 int scriptIsTimedout(void) {
     return scriptIsRunning() && (curr_run_ctx->flags & SCRIPT_TIMEDOUT);
 }
@@ -209,6 +248,7 @@ int scriptPrepareForRun(scriptRunCtx *run_ctx, client *engine_client, client *ca
     run_ctx->c = engine_client;
     run_ctx->original_client = caller;
     run_ctx->funcname = funcname;
+    run_ctx->slot = caller->slot;
 
     client *script_client = run_ctx->c;
     client *curr_client = run_ctx->original_client;
@@ -262,6 +302,8 @@ void scriptResetRun(scriptRunCtx *run_ctx) {
         unprotectClient(run_ctx->original_client);
     }
 
+    run_ctx->slot = -1;
+
     preventCommandPropagation(run_ctx->original_client);
 
     /*  unset curr_run_ctx so we will know there is no running script */
@@ -429,7 +471,7 @@ static int scriptVerifyClusterState(scriptRunCtx *run_ctx, client *c, client *or
     c->flags &= ~(CLIENT_READONLY | CLIENT_ASKING);
     c->flags |= original_c->flags & (CLIENT_READONLY | CLIENT_ASKING);
     int hashslot = -1;
-    if (getNodeByQuery(c, c->cmd, c->argv, c->argc, &hashslot, &error_code) != server.cluster->myself) {
+    if (getNodeByQuery(c, c->cmd, c->argv, c->argc, &hashslot, &error_code) != getMyClusterNode()) {
         if (error_code == CLUSTER_REDIR_DOWN_RO_STATE) {
             *err = sdsnew(
                     "Script attempted to execute a write command while the "
@@ -437,7 +479,22 @@ static int scriptVerifyClusterState(scriptRunCtx *run_ctx, client *c, client *or
         } else if (error_code == CLUSTER_REDIR_DOWN_STATE) {
             *err = sdsnew("Script attempted to execute a command while the "
                     "cluster is down");
+        } else if (error_code == CLUSTER_REDIR_CROSS_SLOT) {
+            *err = sdscatfmt(sdsempty(), 
+                             "Command '%S' in script attempted to access keys that don't hash to the same slot",
+                             c->cmd->fullname);
+        } else if (error_code == CLUSTER_REDIR_UNSTABLE) {
+            /* The request spawns multiple keys in the same slot,
+             * but the slot is not "stable" currently as there is
+             * a migration or import in progress. */
+            *err = sdscatfmt(sdsempty(),
+                             "Unable to execute command '%S' in script "
+                             "because undeclared keys were accessed during rehashing of the slot",
+                             c->cmd->fullname); 
+        } else if (error_code == CLUSTER_REDIR_DOWN_UNBOUND) {
+            *err = sdsnew("Script attempted to access a slot not served"); 
         } else {
+            /* error_code == CLUSTER_REDIR_MOVED || error_code == CLUSTER_REDIR_ASK */
             *err = sdsnew("Script attempted to access a non local key in a "
                     "cluster node");
         }
@@ -448,14 +505,18 @@ static int scriptVerifyClusterState(scriptRunCtx *run_ctx, client *c, client *or
      * already been thrown. This is only checking for cross slot keys being accessed
      * that weren't pre-declared. */
     if (hashslot != -1 && !(run_ctx->flags & SCRIPT_ALLOW_CROSS_SLOT)) {
-        if (original_c->slot == -1) {
-            original_c->slot = hashslot;
-        } else if (original_c->slot != hashslot) {
+        if (run_ctx->slot == -1) {
+            run_ctx->slot = hashslot;
+        } else if (run_ctx->slot != hashslot) {
             *err = sdsnew("Script attempted to access keys that do not hash to "
                     "the same slot");
             return C_ERR;
         }
     }
+
+    c->slot = hashslot;
+    original_c->slot = hashslot;
+
     return C_OK;
 }
 
diff --git a/src/script.h b/src/script.h
index c487165d66c..8d604e493de 100644
--- a/src/script.h
+++ b/src/script.h
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2021, Redis Ltd.
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __SCRIPT_H_
@@ -74,6 +53,7 @@ struct scriptRunCtx {
     int flags;
     int repl_flags;
     monotime start_time;
+    int slot;
 };
 
 /* Scripts flags */
@@ -92,6 +72,9 @@ typedef struct scriptFlag {
 
 extern scriptFlag scripts_flags_def[];
 
+void luaEnvInit(void);
+lua_State *createLuaState(void);
+dict *getLuaScripts(void);
 uint64_t scriptFlagsToCmdFlags(uint64_t cmd_flags, uint64_t script_flags);
 int scriptPrepareForRun(scriptRunCtx *r_ctx, client *engine_client, client *caller, const char *funcname, uint64_t script_flags, int ro);
 void scriptResetRun(scriptRunCtx *r_ctx);
diff --git a/src/script_lua.c b/src/script_lua.c
index 8cdd80523cc..4f325ba2279 100644
--- a/src/script_lua.c
+++ b/src/script_lua.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2021, Redis Ltd.
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "script_lua.h"
@@ -51,6 +30,7 @@ static char *libraries_allow_list[] = {
     "math",
     "table",
     "struct",
+    "os",
     NULL,
 };
 
@@ -602,7 +582,7 @@ static void luaReplyToRedisReply(client *c, client* script_client, lua_State *lu
          * to push 4 elements to the stack. On failure, return error.
          * Notice that we need, in the worst case, 4 elements because returning a map might
          * require push 4 elements to the Lua stack.*/
-        addReplyErrorFormat(c, "reached lua stack limit");
+        addReplyError(c, "reached lua stack limit");
         lua_pop(lua,1); /* pop the element from the stack */
         return;
     }
@@ -818,8 +798,17 @@ static robj **luaArgsToRedisArgv(lua_State *lua, int *argc, int *argv_len) {
             /* We can't use lua_tolstring() for number -> string conversion
              * since Lua uses a format specifier that loses precision. */
             lua_Number num = lua_tonumber(lua,j+1);
-            obj_len = fpconv_dtoa((double)num, dbuf);
-            dbuf[obj_len] = '\0';
+            /* Integer printing function is much faster, check if we can safely use it.
+             * Since lua_Number is not explicitly an integer or a double, we need to make an effort
+             * to convert it as an integer when that's possible, since the string could later be used
+             * in a context that doesn't support scientific notation (e.g. 1e9 instead of 100000000). */
+            long long lvalue;
+            if (double2ll((double)num, &lvalue))
+                obj_len = ll2string(dbuf, sizeof(dbuf), lvalue);
+            else {
+                obj_len = fpconv_dtoa((double)num, dbuf);
+                dbuf[obj_len] = '\0';
+            }
             obj_s = dbuf;
         } else {
             obj_s = (char*)lua_tolstring(lua,j+1,&obj_len);
@@ -1169,7 +1158,7 @@ static int luaLogCommand(lua_State *lua) {
     }
     level = lua_tonumber(lua,-argc);
     if (level < LL_DEBUG || level > LL_WARNING) {
-        luaPushError(lua, "Invalid debug level.");
+        luaPushError(lua, "Invalid log level.");
         return luaError(lua);
     }
     if (level < server.verbosity) return 0;
@@ -1232,6 +1221,7 @@ static void luaLoadLibraries(lua_State *lua) {
     luaLoadLib(lua, LUA_STRLIBNAME, luaopen_string);
     luaLoadLib(lua, LUA_MATHLIBNAME, luaopen_math);
     luaLoadLib(lua, LUA_DBLIBNAME, luaopen_debug);
+    luaLoadLib(lua, LUA_OSLIBNAME, luaopen_os);
     luaLoadLib(lua, "cjson", luaopen_cjson);
     luaLoadLib(lua, "struct", luaopen_struct);
     luaLoadLib(lua, "cmsgpack", luaopen_cmsgpack);
@@ -1239,7 +1229,6 @@ static void luaLoadLibraries(lua_State *lua) {
 
 #if 0 /* Stuff that we don't load currently, for sandboxing concerns. */
     luaLoadLib(lua, LUA_LOADLIBNAME, luaopen_package);
-    luaLoadLib(lua, LUA_OSLIBNAME, luaopen_os);
 #endif
 }
 
diff --git a/src/script_lua.h b/src/script_lua.h
index 4c2b34804e5..d04ed4cab1b 100644
--- a/src/script_lua.h
+++ b/src/script_lua.h
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2021, Redis Ltd.
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __SCRIPT_LUA_H_
diff --git a/src/sds.c b/src/sds.c
index e383e3caae7..53bafffe52c 100644
--- a/src/sds.c
+++ b/src/sds.c
@@ -1,41 +1,18 @@
 /* SDSLib 2.0 -- A C dynamic strings library
  *
- * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
- * Copyright (c) 2015, Oran Agra
- * Copyright (c) 2015, Redis Labs, Inc
+ * Copyright (c) 2006-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
-#include <assert.h>
 #include <limits.h>
+#include "redisassert.h"
 #include "sds.h"
 #include "sdsalloc.h"
 #include "util.h"
@@ -349,20 +326,22 @@ sds sdsResize(sds s, size_t size, int would_regrow) {
      * type. */
     int use_realloc = (oldtype==type || (type < oldtype && type > SDS_TYPE_8));
     size_t newlen = use_realloc ? oldhdrlen+size+1 : hdrlen+size+1;
-    int alloc_already_optimal = 0;
-    #if defined(USE_JEMALLOC)
-        /* je_nallocx returns the expected allocation size for the newlen.
-         * We aim to avoid calling realloc() when using Jemalloc if there is no
-         * change in the allocation size, as it incurs a cost even if the
-         * allocation size stays the same. */
-        alloc_already_optimal = (je_nallocx(newlen, 0) == zmalloc_size(sh));
-    #endif
-
-    if (use_realloc && !alloc_already_optimal) {
-        newsh = s_realloc(sh, newlen);
-        if (newsh == NULL) return NULL;
-        s = (char*)newsh+oldhdrlen;
-    } else if (!alloc_already_optimal) {
+
+    if (use_realloc) {
+        int alloc_already_optimal = 0;
+        #if defined(USE_JEMALLOC)
+            /* je_nallocx returns the expected allocation size for the newlen.
+             * We aim to avoid calling realloc() when using Jemalloc if there is no
+             * change in the allocation size, as it incurs a cost even if the
+             * allocation size stays the same. */
+            alloc_already_optimal = (je_nallocx(newlen, 0) == zmalloc_size(sh));
+        #endif
+        if (!alloc_already_optimal) {
+            newsh = s_realloc(sh, newlen);
+            if (newsh == NULL) return NULL;
+            s = (char*)newsh+oldhdrlen;
+        }
+    } else {
         newsh = s_malloc(newlen);
         if (newsh == NULL) return NULL;
         memcpy((char*)newsh+hdrlen, s, len);
diff --git a/src/sds.h b/src/sds.h
index 208eaa210d9..bf31c7610b4 100644
--- a/src/sds.h
+++ b/src/sds.h
@@ -1,33 +1,10 @@
 /* SDSLib 2.0 -- A C dynamic strings library
  *
- * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
- * Copyright (c) 2015, Oran Agra
- * Copyright (c) 2015, Redis Labs, Inc
+ * Copyright (c) 2006-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __SDS_H
diff --git a/src/sdsalloc.h b/src/sdsalloc.h
index a1c5584f047..447cfbf4b62 100644
--- a/src/sdsalloc.h
+++ b/src/sdsalloc.h
@@ -1,32 +1,10 @@
 /* SDSLib 2.0 -- A C dynamic strings library
  *
- * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
- * Copyright (c) 2015, Redis Labs, Inc
+ * Copyright (c) 2006-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 /* SDS allocator selection.
diff --git a/src/sentinel.c b/src/sentinel.c
index 238be905f6d..1d6c5659d9c 100644
--- a/src/sentinel.c
+++ b/src/sentinel.c
@@ -1,31 +1,10 @@
 /* Redis Sentinel implementation
  *
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -4129,7 +4108,7 @@ NULL
         else if (!strcasecmp(c->argv[2]->ptr,"get") && c->argc >= 4)
             sentinelConfigGetCommand(c);
         else
-            addReplyError(c, "Only SENTINEL CONFIG GET <param> [<param> <param> ...]/ SET <param> <value> [<param> <value> ...] are supported.");
+            addReplyError(c, "Only SENTINEL CONFIG GET <param> [<param> <param> ...] / SET <param> <value> [<param> <value> ...] are supported.");
     } else if (!strcasecmp(c->argv[1]->ptr,"info-cache")) {
         /* SENTINEL INFO-CACHE <name> */
         if (c->argc < 2) goto numargserr;
diff --git a/src/server.c b/src/server.c
index 6815aac3b15..11646e25687 100644
--- a/src/server.c
+++ b/src/server.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -38,12 +17,15 @@
 #include "functions.h"
 #include "hdr_histogram.h"
 #include "syscheck.h"
+#include "threads_mngr.h"
+#include "fmtargs.h"
+#include "mstr.h"
+#include "ebuckets.h"
 
 #include <time.h>
 #include <signal.h>
 #include <sys/wait.h>
 #include <errno.h>
-#include <assert.h>
 #include <ctype.h>
 #include <stdarg.h>
 #include <arpa/inet.h>
@@ -69,6 +51,12 @@
 #include <sys/sysctl.h>
 #endif
 
+#ifdef __GNUC__
+#define GNUC_VERSION_STR STRINGIFY(__GNUC__) "." STRINGIFY(__GNUC_MINOR__) "." STRINGIFY(__GNUC_PATCHLEVEL__)
+#else
+#define GNUC_VERSION_STR "0.0.0"
+#endif
+
 /* Our shared "common" objects */
 
 struct sharedObjectsStruct shared;
@@ -123,11 +111,13 @@ void serverLogRaw(int level, const char *msg) {
         int off;
         struct timeval tv;
         int role_char;
+        int daylight_active = 0;
         pid_t pid = getpid();
 
         gettimeofday(&tv,NULL);
         struct tm tm;
-        nolocks_localtime(&tm,tv.tv_sec,server.timezone,server.daylight_active);
+        atomicGet(server.daylight_active, daylight_active);
+        nolocks_localtime(&tm,tv.tv_sec,server.timezone,daylight_active);
         off = strftime(buf,sizeof(buf),"%d %b %Y %H:%M:%S.",&tm);
         snprintf(buf+off,sizeof(buf)-off,"%03d",(int)tv.tv_usec/1000);
         if (server.sentinel_mode) {
@@ -160,13 +150,9 @@ void _serverLog(int level, const char *fmt, ...) {
     serverLogRaw(level,msg);
 }
 
-/* Log a fixed message without printf-alike capabilities, in a way that is
- * safe to call from a signal handler.
- *
- * We actually use this only for signals that are not fatal from the point
- * of view of Redis. Signals that are going to kill the server anyway and
- * where we need printf-alike features are served by serverLog(). */
-void serverLogFromHandler(int level, const char *msg) {
+/* Low level logging from signal handler. Should be used with pre-formatted strings. 
+   See serverLogFromHandler. */
+void serverLogRawFromHandler(int level, const char *msg) {
     int fd;
     int log_to_stdout = server.logfile[0] == '\0';
     char buf[64];
@@ -176,18 +162,41 @@ void serverLogFromHandler(int level, const char *msg) {
     fd = log_to_stdout ? STDOUT_FILENO :
                          open(server.logfile, O_APPEND|O_CREAT|O_WRONLY, 0644);
     if (fd == -1) return;
-    ll2string(buf,sizeof(buf),getpid());
-    if (write(fd,buf,strlen(buf)) == -1) goto err;
-    if (write(fd,":signal-handler (",17) == -1) goto err;
-    ll2string(buf,sizeof(buf),time(NULL));
-    if (write(fd,buf,strlen(buf)) == -1) goto err;
-    if (write(fd,") ",2) == -1) goto err;
-    if (write(fd,msg,strlen(msg)) == -1) goto err;
-    if (write(fd,"\n",1) == -1) goto err;
+    if (level & LL_RAW) {
+        if (write(fd,msg,strlen(msg)) == -1) goto err;
+    }
+    else {
+        ll2string(buf,sizeof(buf),getpid());
+        if (write(fd,buf,strlen(buf)) == -1) goto err;
+        if (write(fd,":signal-handler (",17) == -1) goto err;
+        ll2string(buf,sizeof(buf),time(NULL));
+        if (write(fd,buf,strlen(buf)) == -1) goto err;
+        if (write(fd,") ",2) == -1) goto err;
+        if (write(fd,msg,strlen(msg)) == -1) goto err;
+        if (write(fd,"\n",1) == -1) goto err;
+    }
 err:
     if (!log_to_stdout) close(fd);
 }
 
+/* An async-signal-safe version of serverLog. if LL_RAW is not included in level flags,
+ * The message format is: <pid>:signal-handler (<time>) <msg> \n
+ * with LL_RAW flag only the msg is printed (with no new line at the end)
+ *
+ * We actually use this only for signals that are not fatal from the point
+ * of view of Redis. Signals that are going to kill the server anyway and
+ * where we need printf-alike features are served by serverLog(). */
+void serverLogFromHandler(int level, const char *fmt, ...) {
+    va_list ap;
+    char msg[LOG_MAX_LEN];
+
+    va_start(ap, fmt);
+    vsnprintf_async_signal_safe(msg, sizeof(msg), fmt, ap);
+    va_end(ap);
+
+    serverLogRawFromHandler(level, msg);
+}
+
 /* Return the UNIX time in microseconds */
 long long ustime(void) {
     struct timeval tv;
@@ -256,6 +265,12 @@ void dictListDestructor(dict *d, void *val)
     listRelease((list*)val);
 }
 
+void dictDictDestructor(dict *d, void *val)
+{
+    UNUSED(d);
+    dictRelease((dict*)val);
+}
+
 int dictSdsKeyCompare(dict *d, const void *key1,
         const void *key2)
 {
@@ -268,6 +283,18 @@ int dictSdsKeyCompare(dict *d, const void *key1,
     return memcmp(key1, key2, l1) == 0;
 }
 
+int dictSdsMstrKeyCompare(dict *d, const void *sdsLookup, const void *mstrStored)
+{
+    int l1,l2;
+    UNUSED(d);
+
+    l1 = sdslen((sds)sdsLookup);
+    l2 = hfieldlen((hfield)mstrStored);
+    if (l1 != l2) return 0;
+    return memcmp(sdsLookup, mstrStored, l1) == 0;
+}
+
+
 /* A case insensitive version used for the command lookup table and other
  * places where case insensitive non binary-safe comparison is needed. */
 int dictSdsKeyCaseCompare(dict *d, const void *key1,
@@ -307,6 +334,10 @@ uint64_t dictObjHash(const void *key) {
     return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
 }
 
+uint64_t dictPtrHash(const void *key) {
+    return dictGenHashFunction((unsigned char*)&key,sizeof(key));
+}
+
 uint64_t dictSdsHash(const void *key) {
     return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));
 }
@@ -325,6 +356,17 @@ uint64_t dictCStrCaseHash(const void *key) {
     return dictGenCaseHashFunction((unsigned char*)key, strlen((char*)key));
 }
 
+/* Dict hash function for client */
+uint64_t dictClientHash(const void *key) {
+    return ((client *)key)->id;
+}
+
+/* Dict compare function for client */
+int dictClientKeyCompare(dict *d, const void *key1, const void *key2) {
+    UNUSED(d);
+    return ((client *)key1)->id == ((client *)key2)->id;
+}
+
 /* Dict compare function for null terminated string */
 int dictCStrKeyCompare(dict *d, const void *key1, const void *key2) {
     int l1,l2;
@@ -385,7 +427,10 @@ uint64_t dictEncObjHash(const void *key) {
  * provisionally if used memory will be over maxmemory after dict expands,
  * but to guarantee the performance of redis, we still allow dict to expand
  * if dict load factor exceeds HASHTABLE_MAX_LOAD_FACTOR. */
-int dictExpandAllowed(size_t moreMem, double usedRatio) {
+int dictResizeAllowed(size_t moreMem, double usedRatio) {
+    /* for debug purposes: dict is not allowed to be resized. */
+    if (!server.dict_resizing) return 0;
+
     if (usedRatio <= HASHTABLE_MAX_LOAD_FACTOR) {
         return !overMaxmemoryAfterAlloc(moreMem);
     } else {
@@ -393,27 +438,6 @@ int dictExpandAllowed(size_t moreMem, double usedRatio) {
     }
 }
 
-/* Returns the size of the DB dict entry metadata in bytes. In cluster mode, the
- * metadata is used for constructing a doubly linked list of the dict entries
- * belonging to the same cluster slot. See the Slot to Key API in cluster.c. */
-size_t dbDictEntryMetadataSize(dict *d) {
-    UNUSED(d);
-    /* NOTICE: this also affects overhead_ht_slot_to_keys in getMemoryOverheadData.
-     * If we ever add non-cluster related data here, that code must be modified too. */
-    return server.cluster_enabled ? sizeof(clusterDictEntryMetadata) : 0;
-}
-
-/* Returns the size of the DB dict metadata in bytes. In cluster mode, we store
- * a pointer to the db in the main db dict, used for updating the slot-to-key
- * mapping when a dictEntry is reallocated. */
-size_t dbDictMetadataSize(void) {
-    return server.cluster_enabled ? sizeof(clusterDictMetadata) : 0;
-}
-
-void dbDictAfterReplaceEntry(dict *d, dictEntry *de) {
-    if (server.cluster_enabled) slotToKeyReplaceEntry(d, de);
-}
-
 /* Generic hash table type where keys are Redis Objects, Values
  * dummy pointers. */
 dictType objectKeyPointerValueDictType = {
@@ -445,6 +469,8 @@ dictType setDictType = {
     NULL,                      /* val dup */
     dictSdsKeyCompare,         /* key compare */
     dictSdsDestructor,         /* key destructor */
+    NULL,                      /* val destructor */
+    NULL,                      /* allow to expand */
     .no_value = 1,             /* no values in this dict */
     .keys_are_odd = 1          /* an SDS string is always an odd pointer */
 };
@@ -457,7 +483,7 @@ dictType zsetDictType = {
     dictSdsKeyCompare,         /* key compare */
     NULL,                      /* Note: SDS string shared & freed by skiplist */
     NULL,                      /* val destructor */
-    NULL                       /* allow to expand */
+    NULL,                      /* allow to expand */
 };
 
 /* Db->dict, keys are sds strings, vals are Redis objects. */
@@ -468,10 +494,7 @@ dictType dbDictType = {
     dictSdsKeyCompare,          /* key compare */
     dictSdsDestructor,          /* key destructor */
     dictObjectDestructor,       /* val destructor */
-    dictExpandAllowed,          /* allow to expand */
-    .dictEntryMetadataBytes = dbDictEntryMetadataSize,
-    .dictMetadataBytes = dbDictMetadataSize,
-    .afterReplaceEntry = dbDictAfterReplaceEntry
+    dictResizeAllowed,          /* allow to resize */
 };
 
 /* Db->expires */
@@ -482,7 +505,7 @@ dictType dbExpiresDictType = {
     dictSdsKeyCompare,          /* key compare */
     NULL,                       /* key destructor */
     NULL,                       /* val destructor */
-    dictExpandAllowed           /* allow to expand */
+    dictResizeAllowed,          /* allow to resize */
 };
 
 /* Command table. sds string -> command struct pointer. */
@@ -504,7 +527,7 @@ dictType hashDictType = {
     dictSdsKeyCompare,          /* key compare */
     dictSdsDestructor,          /* key destructor */
     dictSdsDestructor,          /* val destructor */
-    NULL                        /* allow to expand */
+    NULL,                       /* allow to expand */
 };
 
 /* Dict type without destructor */
@@ -531,6 +554,18 @@ dictType keylistDictType = {
     NULL                        /* allow to expand */
 };
 
+/* KeyDict hash table type has unencoded redis objects as keys and
+ * dicts as values. It's used for PUBSUB command to track clients subscribing the channels. */
+dictType objToDictDictType = {
+    dictObjHash,                /* hash function */
+    NULL,                       /* key dup */
+    NULL,                       /* val dup */
+    dictObjKeyCompare,          /* key compare */
+    dictObjectDestructor,       /* key destructor */
+    dictDictDestructor,         /* val destructor */
+    NULL                        /* allow to expand */
+};
+
 /* Modules system dictionary type. Keys are module name,
  * values are pointer to RedisModule struct. */
 dictType modulesDictType = {
@@ -590,44 +625,14 @@ dictType sdsHashDictType = {
     NULL                        /* allow to expand */
 };
 
-int htNeedsResize(dict *dict) {
-    long long size, used;
-
-    size = dictSlots(dict);
-    used = dictSize(dict);
-    return (size > DICT_HT_INITIAL_SIZE &&
-            (used*100/size < HASHTABLE_MIN_FILL));
-}
-
-/* If the percentage of used slots in the HT reaches HASHTABLE_MIN_FILL
- * we resize the hash table to save memory */
-void tryResizeHashTables(int dbid) {
-    if (htNeedsResize(server.db[dbid].dict))
-        dictResize(server.db[dbid].dict);
-    if (htNeedsResize(server.db[dbid].expires))
-        dictResize(server.db[dbid].expires);
-}
-
-/* Our hash table implementation performs rehashing incrementally while
- * we write/read from the hash table. Still if the server is idle, the hash
- * table will use two tables for a long time. So we try to use 1 millisecond
- * of CPU time at every call of this function to perform some rehashing.
- *
- * The function returns 1 if some rehashing was performed, otherwise 0
- * is returned. */
-int incrementallyRehash(int dbid) {
-    /* Keys dictionary */
-    if (dictIsRehashing(server.db[dbid].dict)) {
-        dictRehashMilliseconds(server.db[dbid].dict,1);
-        return 1; /* already used our millisecond for this loop... */
-    }
-    /* Expires */
-    if (dictIsRehashing(server.db[dbid].expires)) {
-        dictRehashMilliseconds(server.db[dbid].expires,1);
-        return 1; /* already used our millisecond for this loop... */
-    }
-    return 0;
-}
+/* Client Set dictionary type. Keys are client, values are not used. */
+dictType clientDictType = {
+    dictClientHash,             /* hash function */
+    NULL,                       /* key dup */
+    NULL,                       /* val dup */
+    dictClientKeyCompare,       /* key compare */
+    .no_value = 1               /* no values in this dict */
+};
 
 /* This function is called once a background process of some kind terminates,
  * as we want to avoid resizing the hash tables when there is a child in order
@@ -697,7 +702,7 @@ int allPersistenceDisabled(void) {
 /* Add a sample to the instantaneous metric. This function computes the quotient
  * of the increment of value and base, which is useful to record operation count
  * per second, or the average time consumption of an operation.
- * 
+ *
  * current_value - The dividend
  * current_base - The divisor
  * */
@@ -872,6 +877,7 @@ static inline clientMemUsageBucket *getMemUsageBucket(size_t mem) {
  * usage bucket.
  */
 void updateClientMemoryUsage(client *c) {
+    serverAssert(c->conn);
     size_t mem = getClientMemoryUsage(c, NULL);
     int type = getClientType(c);
     /* Now that we have the memory used by the client, remove the old
@@ -884,7 +890,7 @@ void updateClientMemoryUsage(client *c) {
 }
 
 int clientEvictionAllowed(client *c) {
-    if (server.maxmemory_clients == 0 || c->flags & CLIENT_NO_EVICT) {
+    if (server.maxmemory_clients == 0 || c->flags & CLIENT_NO_EVICT || !c->conn) {
         return 0;
     }
     int type = getClientType(c);
@@ -894,7 +900,7 @@ int clientEvictionAllowed(client *c) {
 
 /* This function is used to cleanup the client's previously tracked memory usage.
  * This is called during incremental client memory usage tracking as well as
- * used to reset when client to bucket allocation is not required when 
+ * used to reset when client to bucket allocation is not required when
  * client eviction is disabled.  */
 void removeClientFromMemUsageBucket(client *c, int allow_eviction) {
     if (c->mem_usage_bucket) {
@@ -917,16 +923,14 @@ void removeClientFromMemUsageBucket(client *c, int allow_eviction) {
  * together clients consuming about the same amount of memory and can quickly
  * free them in case we reach maxmemory-clients (client eviction).
  *
- * Note: This function filters clients of type monitor, master or replica regardless
+ * Note: This function filters clients of type no-evict, master or replica regardless
  * of whether the eviction is enabled or not, so the memory usage we get from these
- * types of clients via the INFO command may be out of date. If someday we wanna
- * improve that to make monitors' memory usage more accurate, we need to re-add this
- * function call to `replicationFeedMonitors()`.
+ * types of clients via the INFO command may be out of date.
  *
  * returns 1 if client eviction for this client is allowed, 0 otherwise.
  */
 int updateClientMemUsageAndBucket(client *c) {
-    serverAssert(io_threads_op == IO_THREADS_OP_IDLE);
+    serverAssert(io_threads_op == IO_THREADS_OP_IDLE && c->conn);
     int allow_eviction = clientEvictionAllowed(c);
     removeClientFromMemUsageBucket(c, allow_eviction);
 
@@ -1076,25 +1080,25 @@ void databasesCron(void) {
         /* Don't test more DBs than we have. */
         if (dbs_per_call > server.dbnum) dbs_per_call = server.dbnum;
 
-        /* Resize */
         for (j = 0; j < dbs_per_call; j++) {
-            tryResizeHashTables(resize_db % server.dbnum);
+            redisDb *db = &server.db[resize_db % server.dbnum];
+            kvstoreTryResizeDicts(db->keys, CRON_DICTS_PER_DB);
+            kvstoreTryResizeDicts(db->expires, CRON_DICTS_PER_DB);
             resize_db++;
         }
 
         /* Rehash */
         if (server.activerehashing) {
+            uint64_t elapsed_us = 0;
             for (j = 0; j < dbs_per_call; j++) {
-                int work_done = incrementallyRehash(rehash_db);
-                if (work_done) {
-                    /* If the function did some work, stop here, we'll do
-                     * more at the next cron loop. */
+                redisDb *db = &server.db[rehash_db % server.dbnum];
+                elapsed_us += kvstoreIncrementallyRehash(db->keys, INCREMENTAL_REHASHING_THRESHOLD_US - elapsed_us);
+                if (elapsed_us >= INCREMENTAL_REHASHING_THRESHOLD_US)
                     break;
-                } else {
-                    /* If this db didn't need rehash, we'll try the next one. */
-                    rehash_db++;
-                    rehash_db %= server.dbnum;
-                }
+                elapsed_us += kvstoreIncrementallyRehash(db->expires, INCREMENTAL_REHASHING_THRESHOLD_US - elapsed_us);
+                if (elapsed_us >= INCREMENTAL_REHASHING_THRESHOLD_US)
+                    break;
+                rehash_db++;
             }
         }
     }
@@ -1115,7 +1119,7 @@ static inline void updateCachedTimeWithUs(int update_daylight_info, const long l
         struct tm tm;
         time_t ut = server.unixtime;
         localtime_r(&ut,&tm);
-        server.daylight_active = tm.tm_isdst;
+        atomicSet(server.daylight_active, tm.tm_isdst);
     }
 }
 
@@ -1221,18 +1225,25 @@ void cronUpdateMemoryStats(void) {
          * The fragmentation ratio it'll show is potentially more accurate
          * it excludes other RSS pages such as: shared libraries, LUA and other non-zmalloc
          * allocations, and allocator reserved pages that can be pursed (all not actual frag) */
-        zmalloc_get_allocator_info(&server.cron_malloc_stats.allocator_allocated,
+        zmalloc_get_allocator_info(1,
+                                   &server.cron_malloc_stats.allocator_allocated,
                                    &server.cron_malloc_stats.allocator_active,
-                                   &server.cron_malloc_stats.allocator_resident);
+                                   &server.cron_malloc_stats.allocator_resident,
+                                   NULL,
+                                   &server.cron_malloc_stats.allocator_muzzy,
+                                   &server.cron_malloc_stats.allocator_frag_smallbins_bytes);
+        if (server.lua_arena != UINT_MAX) {
+            zmalloc_get_allocator_info_by_arena(server.lua_arena,
+                                                0,
+                                                &server.cron_malloc_stats.lua_allocator_allocated,
+                                                &server.cron_malloc_stats.lua_allocator_active,
+                                                &server.cron_malloc_stats.lua_allocator_resident,
+                                                &server.cron_malloc_stats.lua_allocator_frag_smallbins_bytes);
+        }
         /* in case the allocator isn't providing these stats, fake them so that
          * fragmentation info still shows some (inaccurate metrics) */
-        if (!server.cron_malloc_stats.allocator_resident) {
-            /* LUA memory isn't part of zmalloc_used, but it is part of the process RSS,
-             * so we must deduct it in order to be able to calculate correct
-             * "allocator fragmentation" ratio */
-            size_t lua_memory = evalMemory();
-            server.cron_malloc_stats.allocator_resident = server.cron_malloc_stats.process_rss - lua_memory;
-        }
+        if (!server.cron_malloc_stats.allocator_resident)
+            server.cron_malloc_stats.allocator_resident = server.cron_malloc_stats.process_rss;
         if (!server.cron_malloc_stats.allocator_active)
             server.cron_malloc_stats.allocator_active = server.cron_malloc_stats.allocator_resident;
         if (!server.cron_malloc_stats.allocator_allocated)
@@ -1351,9 +1362,9 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
             for (j = 0; j < server.dbnum; j++) {
                 long long size, used, vkeys;
 
-                size = dictSlots(server.db[j].dict);
-                used = dictSize(server.db[j].dict);
-                vkeys = dictSize(server.db[j].expires);
+                size = kvstoreBuckets(server.db[j].keys);
+                used = kvstoreSize(server.db[j].keys);
+                vkeys = kvstoreSize(server.db[j].expires);
                 if (used || vkeys) {
                     serverLog(LL_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
                 }
@@ -1436,7 +1447,6 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
      * call this function when needed. */
     updateDictResizePolicy();
 
-
     /* AOF postponed flush: Try at every cron cycle if the slow fsync
      * completed. */
     if ((server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE) &&
@@ -1555,7 +1565,7 @@ void whileBlockedCron(void) {
      * make sure it was done. */
     serverAssert(server.blocked_last_cron);
 
-    /* In case we where called too soon, leave right away. This way one time
+    /* In case we were called too soon, leave right away. This way one time
      * jobs after the loop below don't need an if. and we don't bother to start
      * latency monitor if this function is called too often. */
     if (server.blocked_last_cron >= server.mstime)
@@ -1655,7 +1665,7 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
     connTypeProcessPendingData();
 
     /* If any connection type(typical TLS) still has pending unread data don't sleep at all. */
-    aeSetDontWait(server.el, connTypeHasPendingData());
+    int dont_sleep = connTypeHasPendingData();
 
     /* Call the Redis Cluster before sleep function. Note that this function
      * may change the state of Redis Cluster (from ok to fail or vice versa),
@@ -1717,6 +1727,8 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
     monotime aof_start_time = getMonotonicUs();
     /* Record cron time in beforeSleep. This does not include the time consumed by AOF writing and IO writing below. */
     monotime duration_before_aof = aof_start_time - cron_start_time_before_aof;
+    /* Record the fsync'd offset before flushAppendOnly */
+    long long prev_fsynced_reploff = server.fsynced_reploff;
 
     /* Write the AOF buffer on disk,
      * must be done before handleClientsWithPendingWritesUsingThreads,
@@ -1734,6 +1746,11 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
         long long fsynced_reploff_pending;
         atomicGet(server.fsynced_reploff_pending, fsynced_reploff_pending);
         server.fsynced_reploff = fsynced_reploff_pending;
+
+        /* If we have blocked [WAIT]AOF clients, and fsynced_reploff changed, we want to try to
+         * wake them up ASAP. */
+        if (listLength(server.clients_waiting_acks) && prev_fsynced_reploff != server.fsynced_reploff)
+            dont_sleep = 1;
     }
 
     /* Handle writes with pending output buffers. */
@@ -1772,6 +1789,10 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
         }
     }
 
+    /* Don't sleep at all before the next beforeSleep() if needed (e.g. a
+     * connection has pending data) */
+    aeSetDontWait(server.el, dont_sleep);
+
     /* Before we are going to sleep, let the threads access the dataset by
      * releasing the GIL. Redis main thread will not touch anything at this
      * time. */
@@ -1795,7 +1816,9 @@ void afterSleep(struct aeEventLoop *eventLoop) {
             mstime_t latency;
             latencyStartMonitor(latency);
 
+            atomicSet(server.module_gil_acquring, 1);
             moduleAcquireGIL();
+            atomicSet(server.module_gil_acquring, 0);
             moduleFireServerEvent(REDISMODULE_EVENT_EVENTLOOP,
                                   REDISMODULE_SUBEVENT_EVENTLOOP_AFTER_SLEEP,
                                   NULL);
@@ -1940,6 +1963,8 @@ void createSharedObjects(void) {
     shared.persist = createStringObject("PERSIST",7);
     shared.set = createStringObject("SET",3);
     shared.eval = createStringObject("EVAL",4);
+    shared.hpexpireat = createStringObject("HPEXPIREAT",10);
+    shared.hdel = createStringObject("HDEL",4);
 
     /* Shared command argument */
     shared.left = createStringObject("left",4);
@@ -2039,7 +2064,7 @@ void initServerConfig(void) {
     server.aof_rewrite_base_size = 0;
     server.aof_rewrite_scheduled = 0;
     server.aof_flush_sleep = 0;
-    server.aof_last_fsync = time(NULL);
+    server.aof_last_fsync = time(NULL) * 1000;
     server.aof_cur_timestamp = 0;
     atomicSet(server.aof_bio_fsync_status,C_OK);
     server.aof_rewrite_time_last = -1;
@@ -2052,6 +2077,7 @@ void initServerConfig(void) {
     server.aof_last_incr_size = 0;
     server.aof_last_incr_fsync_offset = 0;
     server.active_defrag_running = 0;
+    server.active_defrag_configuration_changed = 0;
     server.notify_keyspace_events = 0;
     server.blocked_clients = 0;
     memset(server.blocked_clients_by_type,0,
@@ -2064,6 +2090,7 @@ void initServerConfig(void) {
     server.next_client_id = 1; /* Client IDs, start from 1 .*/
     server.page_size = sysconf(_SC_PAGESIZE);
     server.pause_cron = 0;
+    server.dict_resizing = 1;
 
     server.latency_tracking_info_percentiles_len = 3;
     server.latency_tracking_info_percentiles = zmalloc(sizeof(double)*(server.latency_tracking_info_percentiles_len));
@@ -2497,11 +2524,13 @@ void resetServerStats(void) {
     server.stat_numcommands = 0;
     server.stat_numconnections = 0;
     server.stat_expiredkeys = 0;
+    server.stat_expired_hash_fields = 0;
     server.stat_expired_stale_perc = 0;
     server.stat_expired_time_cap_reached_count = 0;
     server.stat_expire_cycle_time_used = 0;
     server.stat_evictedkeys = 0;
     server.stat_evictedclients = 0;
+    server.stat_evictedscripts = 0;
     server.stat_total_eviction_exceeded_time = 0;
     server.stat_last_eviction_exceeded_time = 0;
     server.stat_keyspace_misses = 0;
@@ -2524,6 +2553,8 @@ void resetServerStats(void) {
     atomicSet(server.stat_total_reads_processed, 0);
     server.stat_io_writes_processed = 0;
     atomicSet(server.stat_total_writes_processed, 0);
+    atomicSet(server.stat_client_qbuf_limit_disconnections, 0);
+    server.stat_client_outbuf_limit_disconnections = 0;
     for (j = 0; j < STATS_METRIC_COUNT; j++) {
         server.inst_metric[j].idx = 0;
         server.inst_metric[j].last_sample_base = 0;
@@ -2563,6 +2594,7 @@ void initServer(void) {
     signal(SIGHUP, SIG_IGN);
     signal(SIGPIPE, SIG_IGN);
     setupSignalHandlers();
+    ThreadsManager_init();
     makeThreadKillable();
 
     if (server.syslog_enabled) {
@@ -2579,6 +2611,7 @@ void initServer(void) {
     server.main_thread_id = pthread_self();
     server.current_client = NULL;
     server.errors = raxNew();
+    server.errors_enabled = 1;
     server.execution_nesting = 0;
     server.clients = listCreate();
     server.clients_index = raxNew();
@@ -2631,9 +2664,16 @@ void initServer(void) {
     server.db = zmalloc(sizeof(redisDb)*server.dbnum);
 
     /* Create the Redis databases, and initialize other internal state. */
+    int slot_count_bits = 0;
+    int flags = KVSTORE_ALLOCATE_DICTS_ON_DEMAND;
+    if (server.cluster_enabled) {
+        slot_count_bits = CLUSTER_SLOT_MASK_BITS;
+        flags |= KVSTORE_FREE_EMPTY_DICTS;
+    }
     for (j = 0; j < server.dbnum; j++) {
-        server.db[j].dict = dictCreate(&dbDictType);
-        server.db[j].expires = dictCreate(&dbExpiresDictType);
+        server.db[j].keys = kvstoreCreate(&dbDictType, slot_count_bits, flags);
+        server.db[j].expires = kvstoreCreate(&dbExpiresDictType, slot_count_bits, flags);
+        server.db[j].hexpires = ebCreate();
         server.db[j].expires_cursor = 0;
         server.db[j].blocking_keys = dictCreate(&keylistDictType);
         server.db[j].blocking_keys_unblock_on_nokey = dictCreate(&objectKeyPointerValueDictType);
@@ -2642,13 +2682,17 @@ void initServer(void) {
         server.db[j].id = j;
         server.db[j].avg_ttl = 0;
         server.db[j].defrag_later = listCreate();
-        server.db[j].slots_to_keys = NULL; /* Set by clusterInit later on if necessary. */
         listSetFreeMethod(server.db[j].defrag_later,(void (*)(void*))sdsfree);
     }
     evictionPoolAlloc(); /* Initialize the LRU keys pool. */
-    server.pubsub_channels = dictCreate(&keylistDictType);
-    server.pubsub_patterns = dictCreate(&keylistDictType);
-    server.pubsubshard_channels = dictCreate(&keylistDictType);
+    /* Note that server.pubsub_channels was chosen to be a kvstore (with only one dict, which
+     * seems odd) just to make the code cleaner by making it be the same type as server.pubsubshard_channels
+     * (which has to be kvstore), see pubsubtype.serverPubSubChannels */
+    server.pubsub_channels = kvstoreCreate(&objToDictDictType, 0, KVSTORE_ALLOCATE_DICTS_ON_DEMAND);
+    server.pubsub_patterns = dictCreate(&objToDictDictType);
+    server.pubsubshard_channels = kvstoreCreate(&objToDictDictType, slot_count_bits, KVSTORE_ALLOCATE_DICTS_ON_DEMAND | KVSTORE_FREE_EMPTY_DICTS);
+    server.pubsub_clients = 0;
+    server.watching_clients = 0;
     server.cronloops = 0;
     server.in_exec = 0;
     server.busy_module_yield_flags = BUSY_MODULE_YIELD_NONE;
@@ -2738,8 +2782,12 @@ void initServer(void) {
         server.maxmemory_policy = MAXMEMORY_NO_EVICTION;
     }
 
+    luaEnvInit();
     scriptingInit(1);
-    functionsInit();
+    if (functionsInit() == C_ERR) {
+        serverPanic("Functions initialization failed, check the server logs.");
+        exit(1);
+    }
     slowlogInit();
     latencyMonitorInit();
 
@@ -3070,8 +3118,9 @@ void resetCommandTableStats(dict* commands) {
 }
 
 void resetErrorTableStats(void) {
-    raxFreeWithCallback(server.errors, zfree);
+    freeErrorsRadixTreeAsync(server.errors);
     server.errors = raxNew();
+    server.errors_enabled = 1;
 }
 
 /* ========================== Redis OP Array API ============================ */
@@ -3514,12 +3563,20 @@ void call(client *c, int flags) {
      * re-processing and unblock the client.*/
     c->flags |= CLIENT_EXECUTING_COMMAND;
 
+    /* Setting the CLIENT_REPROCESSING_COMMAND flag so that during the actual
+     * processing of the command proc, the client is aware that it is being
+     * re-processed. */
+    if (reprocessing_command) c->flags |= CLIENT_REPROCESSING_COMMAND;
+
     monotime monotonic_start = 0;
     if (monotonicGetType() == MONOTONIC_CLOCK_HW)
         monotonic_start = getMonotonicUs();
 
     c->cmd->proc(c);
 
+    /* Clear the CLIENT_REPROCESSING_COMMAND flag after the proc is executed. */
+    if (reprocessing_command) c->flags &= ~CLIENT_REPROCESSING_COMMAND;
+
     exitExecutionUnit();
 
     /* In case client is blocked after trying to execute the command,
@@ -3577,7 +3634,7 @@ void call(client *c, int flags) {
 
     /* Send the command to clients in MONITOR mode if applicable,
      * since some administrative commands are considered too dangerous to be shown.
-     * Other exceptions is a client which is unblocked and retring to process the command
+     * Other exceptions is a client which is unblocked and retrying to process the command
      * or we are currently in the process of loading AOF. */
     if (update_command_stats && !reprocessing_command &&
         !(c->cmd->flags & (CMD_SKIP_MONITOR|CMD_ADMIN))) {
@@ -3948,13 +4005,14 @@ int processCommand(client *c) {
         int error_code;
         clusterNode *n = getNodeByQuery(c,c->cmd,c->argv,c->argc,
                                         &c->slot,&error_code);
-        if (n == NULL || n != server.cluster->myself) {
+        if (n == NULL || !clusterNodeIsMyself(n)) {
             if (c->cmd->proc == execCommand) {
                 discardTransaction(c);
             } else {
                 flagTransaction(c);
             }
             clusterRedirectClient(c,n,c->slot,error_code);
+            c->duration = 0;
             c->cmd->rejected_calls++;
             return C_OK;
         }
@@ -3988,21 +4046,7 @@ int processCommand(client *c) {
          * in a slave, that may be the active client, to be freed. */
         if (server.current_client == NULL) return C_ERR;
 
-        int reject_cmd_on_oom = is_denyoom_command;
-        /* If client is in MULTI/EXEC context, queuing may consume an unlimited
-         * amount of memory, so we want to stop that.
-         * However, we never want to reject DISCARD, or even EXEC (unless it
-         * contains denied commands, in which case is_denyoom_command is already
-         * set. */
-        if (c->flags & CLIENT_MULTI &&
-            c->cmd->proc != execCommand &&
-            c->cmd->proc != discardCommand &&
-            c->cmd->proc != quitCommand &&
-            c->cmd->proc != resetCommand) {
-            reject_cmd_on_oom = 1;
-        }
-
-        if (out_of_memory && reject_cmd_on_oom) {
+        if (out_of_memory && is_denyoom_command) {
             rejectCommand(c, shared.oomerr);
             return C_OK;
         }
@@ -4160,23 +4204,64 @@ int processCommand(client *c) {
         int flags = CMD_CALL_FULL;
         if (client_reprocessing_command) flags |= CMD_CALL_REPROCESSING;
         call(c,flags);
-        if (listLength(server.ready_keys))
+        if (listLength(server.ready_keys) && !isInsideYieldingLongCommand())
             handleClientsBlockedOnKeys();
     }
-
     return C_OK;
 }
 
 /* ====================== Error lookup and execution ===================== */
 
+/* Users who abuse lua error_reply will generate a new error object on each
+ * error call, which can make server.errors get bigger and bigger. This will
+ * cause the server to block when calling INFO (we also return errorstats by
+ * default). To prevent the damage it can cause, when a misuse is detected,
+ * we will print the warning log and disable the errorstats to avoid adding
+ * more new errors. It can be re-enabled via CONFIG RESETSTAT. */
+#define ERROR_STATS_NUMBER 128
 void incrementErrorCount(const char *fullerr, size_t namelen) {
-    struct redisError *error = raxFind(server.errors,(unsigned char*)fullerr,namelen);
-    if (error == raxNotFound) {
-        error = zmalloc(sizeof(*error));
-        error->count = 0;
+    /* errorstats is disabled, return ASAP. */
+    if (!server.errors_enabled) return;
+
+    void *result;
+    if (!raxFind(server.errors,(unsigned char*)fullerr,namelen,&result)) {
+        if (server.errors->numele >= ERROR_STATS_NUMBER) {
+            sds errors = sdsempty();
+            raxIterator ri;
+            raxStart(&ri, server.errors);
+            raxSeek(&ri, "^", NULL, 0);
+            while (raxNext(&ri)) {
+                char *tmpsafe;
+                errors = sdscatlen(errors, getSafeInfoString((char *)ri.key, ri.key_len, &tmpsafe), ri.key_len);
+                errors = sdscatlen(errors, ", ", 2);
+                if (tmpsafe != NULL) zfree(tmpsafe);
+            }
+            sdsrange(errors, 0, -3); /* Remove final ", ". */
+            raxStop(&ri);
+
+            /* Print the warning log and the contents of server.errors to the log. */
+            serverLog(LL_WARNING,
+                      "Errorstats stopped adding new errors because the number of "
+                      "errors reached the limit, may be misuse of lua error_reply, "
+                      "please check INFO ERRORSTATS, this can be re-enabled via "
+                      "CONFIG RESETSTAT.");
+            serverLog(LL_WARNING, "Current errors code list: %s", errors);
+            sdsfree(errors);
+
+            /* Reset the errors and add a single element to indicate that it is disabled. */
+            resetErrorTableStats();
+            incrementErrorCount("ERRORSTATS_DISABLED", 19);
+            server.errors_enabled = 0;
+            return;
+        }
+
+        struct redisError *error = zmalloc(sizeof(*error));
+        error->count = 1;
         raxInsert(server.errors,(unsigned char*)fullerr,namelen,error,NULL);
+    } else {
+        struct redisError *error = result;
+        error->count++;
     }
-    error->count++;
 }
 
 /*================================== Shutdown =============================== */
@@ -5451,6 +5536,25 @@ dict *genInfoSectionDict(robj **argv, int argc, char **defaults, int *out_all, i
     return section_dict;
 }
 
+/* sets blocking_keys to the total number of keys which has at least one client blocked on them.
+ * sets blocking_keys_on_nokey to the total number of keys which has at least one client
+ * blocked on them to be written or deleted.
+ * sets watched_keys to the total number of keys which has at least on client watching on them. */
+void totalNumberOfStatefulKeys(unsigned long *blocking_keys, unsigned long *blocking_keys_on_nokey, unsigned long *watched_keys) {
+    unsigned long bkeys=0, bkeys_on_nokey=0, wkeys=0;
+    for (int j = 0; j < server.dbnum; j++) {
+        bkeys += dictSize(server.db[j].blocking_keys);
+        bkeys_on_nokey += dictSize(server.db[j].blocking_keys_unblock_on_nokey);
+        wkeys += dictSize(server.db[j].watched_keys);
+    }
+    if (blocking_keys)
+        *blocking_keys = bkeys;
+    if (blocking_keys_on_nokey)
+        *blocking_keys_on_nokey = bkeys_on_nokey;
+    if (watched_keys)
+        *watched_keys = wkeys;
+}
+
 /* Create the string returned by the INFO command. This is decoupled
  * by the INFO command itself as we need to report the same information
  * on memory corruption problems. */
@@ -5488,60 +5592,33 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
             call_uname = 0;
         }
 
-        info = sdscatfmt(info,
-            "# Server\r\n"
-            "redis_version:%s\r\n"
-            "redis_git_sha1:%s\r\n"
-            "redis_git_dirty:%i\r\n"
-            "redis_build_id:%s\r\n"
-            "redis_mode:%s\r\n"
-            "os:%s %s %s\r\n"
-            "arch_bits:%i\r\n"
-            "monotonic_clock:%s\r\n"
-            "multiplexing_api:%s\r\n"
-            "atomicvar_api:%s\r\n"
-            "gcc_version:%i.%i.%i\r\n"
-            "process_id:%I\r\n"
-            "process_supervised:%s\r\n"
-            "run_id:%s\r\n"
-            "tcp_port:%i\r\n"
-            "server_time_usec:%I\r\n"
-            "uptime_in_seconds:%I\r\n"
-            "uptime_in_days:%I\r\n"
-            "hz:%i\r\n"
-            "configured_hz:%i\r\n"
-            "lru_clock:%u\r\n"
-            "executable:%s\r\n"
-            "config_file:%s\r\n"
-            "io_threads_active:%i\r\n",
-            REDIS_VERSION,
-            redisGitSHA1(),
-            strtol(redisGitDirty(),NULL,10) > 0,
-            redisBuildIdString(),
-            mode,
-            name.sysname, name.release, name.machine,
-            server.arch_bits,
-            monotonicInfoString(),
-            aeGetApiName(),
-            REDIS_ATOMIC_API,
-#ifdef __GNUC__
-            __GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__,
-#else
-            0,0,0,
-#endif
-            (int64_t) getpid(),
-            supervised,
-            server.runid,
-            server.port ? server.port : server.tls_port,
-            (int64_t)server.ustime,
-            (int64_t)uptime,
-            (int64_t)(uptime/(3600*24)),
-            server.hz,
-            server.config_hz,
-            server.lruclock,
-            server.executable ? server.executable : "",
-            server.configfile ? server.configfile : "",
-            server.io_threads_active);
+        info = sdscatfmt(info, "# Server\r\n" FMTARGS(
+            "redis_version:%s\r\n", REDIS_VERSION,
+            "redis_git_sha1:%s\r\n", redisGitSHA1(),
+            "redis_git_dirty:%i\r\n", strtol(redisGitDirty(),NULL,10) > 0,
+            "redis_build_id:%s\r\n", redisBuildIdString(),
+            "redis_mode:%s\r\n", mode,
+            "os:%s", name.sysname,
+            " %s", name.release,
+            " %s\r\n", name.machine,
+            "arch_bits:%i\r\n", server.arch_bits,
+            "monotonic_clock:%s\r\n", monotonicInfoString(),
+            "multiplexing_api:%s\r\n", aeGetApiName(),
+            "atomicvar_api:%s\r\n", REDIS_ATOMIC_API,
+            "gcc_version:%s\r\n", GNUC_VERSION_STR,
+            "process_id:%I\r\n", (int64_t) getpid(),
+            "process_supervised:%s\r\n", supervised,
+            "run_id:%s\r\n", server.runid,
+            "tcp_port:%i\r\n", server.port ? server.port : server.tls_port,
+            "server_time_usec:%I\r\n", (int64_t)server.ustime,
+            "uptime_in_seconds:%I\r\n", (int64_t)uptime,
+            "uptime_in_days:%I\r\n", (int64_t)(uptime/(3600*24)),
+            "hz:%i\r\n", server.hz,
+            "configured_hz:%i\r\n", server.config_hz,
+            "lru_clock:%u\r\n", server.lruclock,
+            "executable:%s\r\n", server.executable ? server.executable : "",
+            "config_file:%s\r\n", server.configfile ? server.configfile : "",
+            "io_threads_active:%i\r\n", server.io_threads_active));
 
         /* Conditional properties */
         if (isShutdownInitiated()) {
@@ -5557,31 +5634,24 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
     /* Clients */
     if (all_sections || (dictFind(section_dict,"clients") != NULL)) {
         size_t maxin, maxout;
-        unsigned long blocking_keys, blocking_keys_on_nokey;
+        unsigned long blocking_keys, blocking_keys_on_nokey, watched_keys;
         getExpansiveClientsInfo(&maxin,&maxout);
-        totalNumberOfBlockingKeys(&blocking_keys, &blocking_keys_on_nokey);
+        totalNumberOfStatefulKeys(&blocking_keys, &blocking_keys_on_nokey, &watched_keys);
         if (sections++) info = sdscat(info,"\r\n");
-        info = sdscatprintf(info,
-            "# Clients\r\n"
-            "connected_clients:%lu\r\n"
-            "cluster_connections:%lu\r\n"
-            "maxclients:%u\r\n"
-            "client_recent_max_input_buffer:%zu\r\n"
-            "client_recent_max_output_buffer:%zu\r\n"
-            "blocked_clients:%d\r\n"
-            "tracking_clients:%d\r\n"
-            "clients_in_timeout_table:%llu\r\n"
-            "total_blocking_keys:%lu\r\n"
-            "total_blocking_keys_on_nokey:%lu\r\n",
-            listLength(server.clients)-listLength(server.slaves),
-            getClusterConnectionsCount(),
-            server.maxclients,
-            maxin, maxout,
-            server.blocked_clients,
-            server.tracking_clients,
-            (unsigned long long) raxSize(server.clients_timeout_table),
-            blocking_keys,
-            blocking_keys_on_nokey);
+        info = sdscatprintf(info, "# Clients\r\n" FMTARGS(
+            "connected_clients:%lu\r\n", listLength(server.clients) - listLength(server.slaves),
+            "cluster_connections:%lu\r\n", getClusterConnectionsCount(),
+            "maxclients:%u\r\n", server.maxclients,
+            "client_recent_max_input_buffer:%zu\r\n", maxin,
+            "client_recent_max_output_buffer:%zu\r\n", maxout,
+            "blocked_clients:%d\r\n", server.blocked_clients,
+            "tracking_clients:%d\r\n", server.tracking_clients,
+            "pubsub_clients:%d\r\n", server.pubsub_clients,
+            "watching_clients:%d\r\n", server.watching_clients,
+            "clients_in_timeout_table:%llu\r\n", (unsigned long long) raxSize(server.clients_timeout_table),
+            "total_watched_keys:%lu\r\n", watched_keys,
+            "total_blocking_keys:%lu\r\n", blocking_keys,
+            "total_blocking_keys_on_nokey:%lu\r\n", blocking_keys_on_nokey));
     }
 
     /* Memory */
@@ -5618,114 +5688,64 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
         bytesToHuman(maxmemory_hmem,sizeof(maxmemory_hmem),server.maxmemory);
 
         if (sections++) info = sdscat(info,"\r\n");
-        info = sdscatprintf(info,
-            "# Memory\r\n"
-            "used_memory:%zu\r\n"
-            "used_memory_human:%s\r\n"
-            "used_memory_rss:%zu\r\n"
-            "used_memory_rss_human:%s\r\n"
-            "used_memory_peak:%zu\r\n"
-            "used_memory_peak_human:%s\r\n"
-            "used_memory_peak_perc:%.2f%%\r\n"
-            "used_memory_overhead:%zu\r\n"
-            "used_memory_startup:%zu\r\n"
-            "used_memory_dataset:%zu\r\n"
-            "used_memory_dataset_perc:%.2f%%\r\n"
-            "allocator_allocated:%zu\r\n"
-            "allocator_active:%zu\r\n"
-            "allocator_resident:%zu\r\n"
-            "total_system_memory:%lu\r\n"
-            "total_system_memory_human:%s\r\n"
-            "used_memory_lua:%lld\r\n" /* deprecated, renamed to used_memory_vm_eval */
-            "used_memory_vm_eval:%lld\r\n"
-            "used_memory_lua_human:%s\r\n" /* deprecated */
-            "used_memory_scripts_eval:%lld\r\n"
-            "number_of_cached_scripts:%lu\r\n"
-            "number_of_functions:%lu\r\n"
-            "number_of_libraries:%lu\r\n"
-            "used_memory_vm_functions:%lld\r\n"
-            "used_memory_vm_total:%lld\r\n"
-            "used_memory_vm_total_human:%s\r\n"
-            "used_memory_functions:%lld\r\n"
-            "used_memory_scripts:%lld\r\n"
-            "used_memory_scripts_human:%s\r\n"
-            "maxmemory:%lld\r\n"
-            "maxmemory_human:%s\r\n"
-            "maxmemory_policy:%s\r\n"
-            "allocator_frag_ratio:%.2f\r\n"
-            "allocator_frag_bytes:%zu\r\n"
-            "allocator_rss_ratio:%.2f\r\n"
-            "allocator_rss_bytes:%zd\r\n"
-            "rss_overhead_ratio:%.2f\r\n"
-            "rss_overhead_bytes:%zd\r\n"
-            "mem_fragmentation_ratio:%.2f\r\n"
-            "mem_fragmentation_bytes:%zd\r\n"
-            "mem_not_counted_for_evict:%zu\r\n"
-            "mem_replication_backlog:%zu\r\n"
-            "mem_total_replication_buffers:%zu\r\n"
-            "mem_clients_slaves:%zu\r\n"
-            "mem_clients_normal:%zu\r\n"
-            "mem_cluster_links:%zu\r\n"
-            "mem_aof_buffer:%zu\r\n"
-            "mem_allocator:%s\r\n"
-            "active_defrag_running:%d\r\n"
-            "lazyfree_pending_objects:%zu\r\n"
-            "lazyfreed_objects:%zu\r\n",
-            zmalloc_used,
-            hmem,
-            server.cron_malloc_stats.process_rss,
-            used_memory_rss_hmem,
-            server.stat_peak_memory,
-            peak_hmem,
-            mh->peak_perc,
-            mh->overhead_total,
-            mh->startup_allocated,
-            mh->dataset,
-            mh->dataset_perc,
-            server.cron_malloc_stats.allocator_allocated,
-            server.cron_malloc_stats.allocator_active,
-            server.cron_malloc_stats.allocator_resident,
-            (unsigned long)total_system_mem,
-            total_system_hmem,
-            memory_lua,
-            memory_lua,
-            used_memory_lua_hmem,
-            (long long) mh->lua_caches,
-            dictSize(evalScriptsDict()),
-            functionsNum(),
-            functionsLibNum(),
-            memory_functions,
-            memory_functions + memory_lua,
-            used_memory_vm_total_hmem,
-            (long long) mh->functions_caches,
-            (long long) mh->lua_caches + (long long) mh->functions_caches,
-            used_memory_scripts_hmem,
-            server.maxmemory,
-            maxmemory_hmem,
-            evict_policy,
-            mh->allocator_frag,
-            mh->allocator_frag_bytes,
-            mh->allocator_rss,
-            mh->allocator_rss_bytes,
-            mh->rss_extra,
-            mh->rss_extra_bytes,
-            mh->total_frag,       /* This is the total RSS overhead, including
-                                     fragmentation, but not just it. This field
-                                     (and the next one) is named like that just
-                                     for backward compatibility. */
-            mh->total_frag_bytes,
-            freeMemoryGetNotCountedMemory(),
-            mh->repl_backlog,
-            server.repl_buffer_mem,
-            mh->clients_slaves,
-            mh->clients_normal,
-            mh->cluster_links,
-            mh->aof_buffer,
-            ZMALLOC_LIB,
-            server.active_defrag_running,
-            lazyfreeGetPendingObjectsCount(),
-            lazyfreeGetFreedObjectsCount()
-        );
+        info = sdscatprintf(info, "# Memory\r\n" FMTARGS(
+            "used_memory:%zu\r\n", zmalloc_used,
+            "used_memory_human:%s\r\n", hmem,
+            "used_memory_rss:%zu\r\n", server.cron_malloc_stats.process_rss,
+            "used_memory_rss_human:%s\r\n", used_memory_rss_hmem,
+            "used_memory_peak:%zu\r\n", server.stat_peak_memory,
+            "used_memory_peak_human:%s\r\n", peak_hmem,
+            "used_memory_peak_perc:%.2f%%\r\n", mh->peak_perc,
+            "used_memory_overhead:%zu\r\n", mh->overhead_total,
+            "used_memory_startup:%zu\r\n", mh->startup_allocated,
+            "used_memory_dataset:%zu\r\n", mh->dataset,
+            "used_memory_dataset_perc:%.2f%%\r\n", mh->dataset_perc,
+            "allocator_allocated:%zu\r\n", server.cron_malloc_stats.allocator_allocated,
+            "allocator_active:%zu\r\n", server.cron_malloc_stats.allocator_active,
+            "allocator_resident:%zu\r\n", server.cron_malloc_stats.allocator_resident,
+            "allocator_muzzy:%zu\r\n", server.cron_malloc_stats.allocator_muzzy,
+            "total_system_memory:%lu\r\n", (unsigned long)total_system_mem,
+            "total_system_memory_human:%s\r\n", total_system_hmem,
+            "used_memory_lua:%lld\r\n", memory_lua, /* deprecated, renamed to used_memory_vm_eval */
+            "used_memory_vm_eval:%lld\r\n", memory_lua,
+            "used_memory_lua_human:%s\r\n", used_memory_lua_hmem, /* deprecated */
+            "used_memory_scripts_eval:%lld\r\n", (long long)mh->lua_caches,
+            "number_of_cached_scripts:%lu\r\n", dictSize(evalScriptsDict()),
+            "number_of_functions:%lu\r\n", functionsNum(),
+            "number_of_libraries:%lu\r\n", functionsLibNum(),
+            "used_memory_vm_functions:%lld\r\n", memory_functions,
+            "used_memory_vm_total:%lld\r\n", memory_functions + memory_lua,
+            "used_memory_vm_total_human:%s\r\n", used_memory_vm_total_hmem,
+            "used_memory_functions:%lld\r\n", (long long)mh->functions_caches,
+            "used_memory_scripts:%lld\r\n", (long long)mh->lua_caches + (long long)mh->functions_caches,
+            "used_memory_scripts_human:%s\r\n", used_memory_scripts_hmem,
+            "maxmemory:%lld\r\n", server.maxmemory,
+            "maxmemory_human:%s\r\n", maxmemory_hmem,
+            "maxmemory_policy:%s\r\n", evict_policy,
+            "allocator_frag_ratio:%.2f\r\n", mh->allocator_frag,
+            "allocator_frag_bytes:%zu\r\n", mh->allocator_frag_bytes,
+            "allocator_rss_ratio:%.2f\r\n", mh->allocator_rss,
+            "allocator_rss_bytes:%zd\r\n", mh->allocator_rss_bytes,
+            "rss_overhead_ratio:%.2f\r\n", mh->rss_extra,
+            "rss_overhead_bytes:%zd\r\n", mh->rss_extra_bytes,
+            /* The next field (mem_fragmentation_ratio) is the total RSS
+             * overhead, including fragmentation, but not just it. This field
+             * (and the next one) is named like that just for backward
+             * compatibility. */
+            "mem_fragmentation_ratio:%.2f\r\n", mh->total_frag,
+            "mem_fragmentation_bytes:%zd\r\n", mh->total_frag_bytes,
+            "mem_not_counted_for_evict:%zu\r\n", freeMemoryGetNotCountedMemory(),
+            "mem_replication_backlog:%zu\r\n", mh->repl_backlog,
+            "mem_total_replication_buffers:%zu\r\n", server.repl_buffer_mem,
+            "mem_clients_slaves:%zu\r\n", mh->clients_slaves,
+            "mem_clients_normal:%zu\r\n", mh->clients_normal,
+            "mem_cluster_links:%zu\r\n", mh->cluster_links,
+            "mem_aof_buffer:%zu\r\n", mh->aof_buffer,
+            "mem_allocator:%s\r\n", ZMALLOC_LIB,
+            "mem_overhead_db_hashtable_rehashing:%zu\r\n", mh->overhead_db_hashtable_rehashing,
+            "active_defrag_running:%d\r\n", server.active_defrag_running,
+            "lazyfree_pending_objects:%zu\r\n", lazyfreeGetPendingObjectsCount(),
+            "lazyfreed_objects:%zu\r\n", lazyfreeGetFreedObjectsCount()));
         freeMemoryOverheadData(mh);
     }
 
@@ -5741,86 +5761,51 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
         int aof_bio_fsync_status;
         atomicGet(server.aof_bio_fsync_status,aof_bio_fsync_status);
 
-        info = sdscatprintf(info,
-            "# Persistence\r\n"
-            "loading:%d\r\n"
-            "async_loading:%d\r\n"
-            "current_cow_peak:%zu\r\n"
-            "current_cow_size:%zu\r\n"
-            "current_cow_size_age:%lu\r\n"
-            "current_fork_perc:%.2f\r\n"
-            "current_save_keys_processed:%zu\r\n"
-            "current_save_keys_total:%zu\r\n"
-            "rdb_changes_since_last_save:%lld\r\n"
-            "rdb_bgsave_in_progress:%d\r\n"
-            "rdb_last_save_time:%jd\r\n"
-            "rdb_last_bgsave_status:%s\r\n"
-            "rdb_last_bgsave_time_sec:%jd\r\n"
-            "rdb_current_bgsave_time_sec:%jd\r\n"
-            "rdb_saves:%lld\r\n"
-            "rdb_last_cow_size:%zu\r\n"
-            "rdb_last_load_keys_expired:%lld\r\n"
-            "rdb_last_load_keys_loaded:%lld\r\n"
-            "aof_enabled:%d\r\n"
-            "aof_rewrite_in_progress:%d\r\n"
-            "aof_rewrite_scheduled:%d\r\n"
-            "aof_last_rewrite_time_sec:%jd\r\n"
-            "aof_current_rewrite_time_sec:%jd\r\n"
-            "aof_last_bgrewrite_status:%s\r\n"
-            "aof_rewrites:%lld\r\n"
-            "aof_rewrites_consecutive_failures:%lld\r\n"
-            "aof_last_write_status:%s\r\n"
-            "aof_last_cow_size:%zu\r\n"
-            "module_fork_in_progress:%d\r\n"
-            "module_fork_last_cow_size:%zu\r\n",
-            (int)(server.loading && !server.async_loading),
-            (int)server.async_loading,
-            server.stat_current_cow_peak,
-            server.stat_current_cow_bytes,
-            server.stat_current_cow_updated ? (unsigned long) elapsedMs(server.stat_current_cow_updated) / 1000 : 0,
-            fork_perc,
-            server.stat_current_save_keys_processed,
-            server.stat_current_save_keys_total,
-            server.dirty,
-            server.child_type == CHILD_TYPE_RDB,
-            (intmax_t)server.lastsave,
-            (server.lastbgsave_status == C_OK) ? "ok" : "err",
-            (intmax_t)server.rdb_save_time_last,
-            (intmax_t)((server.child_type != CHILD_TYPE_RDB) ?
-                -1 : time(NULL)-server.rdb_save_time_start),
-            server.stat_rdb_saves,
-            server.stat_rdb_cow_bytes,
-            server.rdb_last_load_keys_expired,
-            server.rdb_last_load_keys_loaded,
-            server.aof_state != AOF_OFF,
-            server.child_type == CHILD_TYPE_AOF,
-            server.aof_rewrite_scheduled,
-            (intmax_t)server.aof_rewrite_time_last,
-            (intmax_t)((server.child_type != CHILD_TYPE_AOF) ?
-                -1 : time(NULL)-server.aof_rewrite_time_start),
-            (server.aof_lastbgrewrite_status == C_OK) ? "ok" : "err",
-            server.stat_aof_rewrites,
-            server.stat_aofrw_consecutive_failures,
-            (server.aof_last_write_status == C_OK &&
-                aof_bio_fsync_status == C_OK) ? "ok" : "err",
-            server.stat_aof_cow_bytes,
-            server.child_type == CHILD_TYPE_MODULE,
-            server.stat_module_cow_bytes);
+        info = sdscatprintf(info, "# Persistence\r\n" FMTARGS(
+            "loading:%d\r\n", (int)(server.loading && !server.async_loading),
+            "async_loading:%d\r\n", (int)server.async_loading,
+            "current_cow_peak:%zu\r\n", server.stat_current_cow_peak,
+            "current_cow_size:%zu\r\n", server.stat_current_cow_bytes,
+            "current_cow_size_age:%lu\r\n", (server.stat_current_cow_updated ?
+                                             (unsigned long) elapsedMs(server.stat_current_cow_updated) / 1000 : 0),
+            "current_fork_perc:%.2f\r\n", fork_perc,
+            "current_save_keys_processed:%zu\r\n", server.stat_current_save_keys_processed,
+            "current_save_keys_total:%zu\r\n", server.stat_current_save_keys_total,
+            "rdb_changes_since_last_save:%lld\r\n", server.dirty,
+            "rdb_bgsave_in_progress:%d\r\n", server.child_type == CHILD_TYPE_RDB,
+            "rdb_last_save_time:%jd\r\n", (intmax_t)server.lastsave,
+            "rdb_last_bgsave_status:%s\r\n", (server.lastbgsave_status == C_OK) ? "ok" : "err",
+            "rdb_last_bgsave_time_sec:%jd\r\n", (intmax_t)server.rdb_save_time_last,
+            "rdb_current_bgsave_time_sec:%jd\r\n", (intmax_t)((server.child_type != CHILD_TYPE_RDB) ?
+                                                              -1 : time(NULL)-server.rdb_save_time_start),
+            "rdb_saves:%lld\r\n", server.stat_rdb_saves,
+            "rdb_last_cow_size:%zu\r\n", server.stat_rdb_cow_bytes,
+            "rdb_last_load_keys_expired:%lld\r\n", server.rdb_last_load_keys_expired,
+            "rdb_last_load_keys_loaded:%lld\r\n", server.rdb_last_load_keys_loaded,
+            "aof_enabled:%d\r\n", server.aof_state != AOF_OFF,
+            "aof_rewrite_in_progress:%d\r\n", server.child_type == CHILD_TYPE_AOF,
+            "aof_rewrite_scheduled:%d\r\n", server.aof_rewrite_scheduled,
+            "aof_last_rewrite_time_sec:%jd\r\n", (intmax_t)server.aof_rewrite_time_last,
+            "aof_current_rewrite_time_sec:%jd\r\n", (intmax_t)((server.child_type != CHILD_TYPE_AOF) ?
+                                                               -1 : time(NULL)-server.aof_rewrite_time_start),
+            "aof_last_bgrewrite_status:%s\r\n", (server.aof_lastbgrewrite_status == C_OK ?
+                                                 "ok" : "err"),
+            "aof_rewrites:%lld\r\n", server.stat_aof_rewrites,
+            "aof_rewrites_consecutive_failures:%lld\r\n", server.stat_aofrw_consecutive_failures,
+            "aof_last_write_status:%s\r\n", (server.aof_last_write_status == C_OK &&
+                                             aof_bio_fsync_status == C_OK) ? "ok" : "err",
+            "aof_last_cow_size:%zu\r\n", server.stat_aof_cow_bytes,
+            "module_fork_in_progress:%d\r\n", server.child_type == CHILD_TYPE_MODULE,
+            "module_fork_last_cow_size:%zu\r\n", server.stat_module_cow_bytes));
 
         if (server.aof_enabled) {
-            info = sdscatprintf(info,
-                "aof_current_size:%lld\r\n"
-                "aof_base_size:%lld\r\n"
-                "aof_pending_rewrite:%d\r\n"
-                "aof_buffer_length:%zu\r\n"
-                "aof_pending_bio_fsync:%lu\r\n"
-                "aof_delayed_fsync:%lu\r\n",
-                (long long) server.aof_current_size,
-                (long long) server.aof_rewrite_base_size,
-                server.aof_rewrite_scheduled,
-                sdslen(server.aof_buf),
-                bioPendingJobsOfType(BIO_AOF_FSYNC),
-                server.aof_delayed_fsync);
+            info = sdscatprintf(info, FMTARGS(
+                "aof_current_size:%lld\r\n", (long long) server.aof_current_size,
+                "aof_base_size:%lld\r\n", (long long) server.aof_rewrite_base_size,
+                "aof_pending_rewrite:%d\r\n", server.aof_rewrite_scheduled,
+                "aof_buffer_length:%zu\r\n", sdslen(server.aof_buf),
+                "aof_pending_bio_fsync:%lu\r\n", bioPendingJobsOfType(BIO_AOF_FSYNC),
+                "aof_delayed_fsync:%lu\r\n", server.aof_delayed_fsync));
         }
 
         if (server.loading) {
@@ -5847,20 +5832,13 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
                 eta = (elapsed*remaining_bytes)/(server.loading_loaded_bytes+1);
             }
 
-            info = sdscatprintf(info,
-                "loading_start_time:%jd\r\n"
-                "loading_total_bytes:%llu\r\n"
-                "loading_rdb_used_mem:%llu\r\n"
-                "loading_loaded_bytes:%llu\r\n"
-                "loading_loaded_perc:%.2f\r\n"
-                "loading_eta_seconds:%jd\r\n",
-                (intmax_t) server.loading_start_time,
-                (unsigned long long) server.loading_total_bytes,
-                (unsigned long long) server.loading_rdb_used_mem,
-                (unsigned long long) server.loading_loaded_bytes,
-                perc,
-                (intmax_t)eta
-            );
+            info = sdscatprintf(info, FMTARGS(
+                "loading_start_time:%jd\r\n", (intmax_t) server.loading_start_time,
+                "loading_total_bytes:%llu\r\n", (unsigned long long) server.loading_total_bytes,
+                "loading_rdb_used_mem:%llu\r\n", (unsigned long long) server.loading_rdb_used_mem,
+                "loading_loaded_bytes:%llu\r\n", (unsigned long long) server.loading_loaded_bytes,
+                "loading_loaded_perc:%.2f\r\n", perc,
+                "loading_eta_seconds:%jd\r\n", (intmax_t)eta));
         }
     }
 
@@ -5873,126 +5851,76 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
             (long long) elapsedUs(server.stat_last_eviction_exceeded_time): 0;
         long long current_active_defrag_time = server.stat_last_active_defrag_time ?
             (long long) elapsedUs(server.stat_last_active_defrag_time): 0;
+        long long stat_client_qbuf_limit_disconnections;
         atomicGet(server.stat_total_reads_processed, stat_total_reads_processed);
         atomicGet(server.stat_total_writes_processed, stat_total_writes_processed);
         atomicGet(server.stat_net_input_bytes, stat_net_input_bytes);
         atomicGet(server.stat_net_output_bytes, stat_net_output_bytes);
         atomicGet(server.stat_net_repl_input_bytes, stat_net_repl_input_bytes);
         atomicGet(server.stat_net_repl_output_bytes, stat_net_repl_output_bytes);
+        atomicGet(server.stat_client_qbuf_limit_disconnections, stat_client_qbuf_limit_disconnections);
 
         if (sections++) info = sdscat(info,"\r\n");
-        info = sdscatprintf(info,
-            "# Stats\r\n"
-            "total_connections_received:%lld\r\n"
-            "total_commands_processed:%lld\r\n"
-            "instantaneous_ops_per_sec:%lld\r\n"
-            "total_net_input_bytes:%lld\r\n"
-            "total_net_output_bytes:%lld\r\n"
-            "total_net_repl_input_bytes:%lld\r\n"
-            "total_net_repl_output_bytes:%lld\r\n"
-            "instantaneous_input_kbps:%.2f\r\n"
-            "instantaneous_output_kbps:%.2f\r\n"
-            "instantaneous_input_repl_kbps:%.2f\r\n"
-            "instantaneous_output_repl_kbps:%.2f\r\n"
-            "rejected_connections:%lld\r\n"
-            "sync_full:%lld\r\n"
-            "sync_partial_ok:%lld\r\n"
-            "sync_partial_err:%lld\r\n"
-            "expired_keys:%lld\r\n"
-            "expired_stale_perc:%.2f\r\n"
-            "expired_time_cap_reached_count:%lld\r\n"
-            "expire_cycle_cpu_milliseconds:%lld\r\n"
-            "evicted_keys:%lld\r\n"
-            "evicted_clients:%lld\r\n"
-            "total_eviction_exceeded_time:%lld\r\n"
-            "current_eviction_exceeded_time:%lld\r\n"
-            "keyspace_hits:%lld\r\n"
-            "keyspace_misses:%lld\r\n"
-            "pubsub_channels:%ld\r\n"
-            "pubsub_patterns:%lu\r\n"
-            "pubsubshard_channels:%lu\r\n"
-            "latest_fork_usec:%lld\r\n"
-            "total_forks:%lld\r\n"
-            "migrate_cached_sockets:%ld\r\n"
-            "slave_expires_tracked_keys:%zu\r\n"
-            "active_defrag_hits:%lld\r\n"
-            "active_defrag_misses:%lld\r\n"
-            "active_defrag_key_hits:%lld\r\n"
-            "active_defrag_key_misses:%lld\r\n"
-            "total_active_defrag_time:%lld\r\n"
-            "current_active_defrag_time:%lld\r\n"
-            "tracking_total_keys:%lld\r\n"
-            "tracking_total_items:%lld\r\n"
-            "tracking_total_prefixes:%lld\r\n"
-            "unexpected_error_replies:%lld\r\n"
-            "total_error_replies:%lld\r\n"
-            "dump_payload_sanitizations:%lld\r\n"
-            "total_reads_processed:%lld\r\n"
-            "total_writes_processed:%lld\r\n"
-            "io_threaded_reads_processed:%lld\r\n"
-            "io_threaded_writes_processed:%lld\r\n"
-            "reply_buffer_shrinks:%lld\r\n"
-            "reply_buffer_expands:%lld\r\n"
-            "eventloop_cycles:%llu\r\n"
-            "eventloop_duration_sum:%llu\r\n"
-            "eventloop_duration_cmd_sum:%llu\r\n"
-            "instantaneous_eventloop_cycles_per_sec:%llu\r\n"
-            "instantaneous_eventloop_duration_usec:%llu\r\n",
-            server.stat_numconnections,
-            server.stat_numcommands,
-            getInstantaneousMetric(STATS_METRIC_COMMAND),
-            stat_net_input_bytes + stat_net_repl_input_bytes,
-            stat_net_output_bytes + stat_net_repl_output_bytes,
-            stat_net_repl_input_bytes,
-            stat_net_repl_output_bytes,
-            (float)getInstantaneousMetric(STATS_METRIC_NET_INPUT)/1024,
-            (float)getInstantaneousMetric(STATS_METRIC_NET_OUTPUT)/1024,
-            (float)getInstantaneousMetric(STATS_METRIC_NET_INPUT_REPLICATION)/1024,
-            (float)getInstantaneousMetric(STATS_METRIC_NET_OUTPUT_REPLICATION)/1024,
-            server.stat_rejected_conn,
-            server.stat_sync_full,
-            server.stat_sync_partial_ok,
-            server.stat_sync_partial_err,
-            server.stat_expiredkeys,
-            server.stat_expired_stale_perc*100,
-            server.stat_expired_time_cap_reached_count,
-            server.stat_expire_cycle_time_used/1000,
-            server.stat_evictedkeys,
-            server.stat_evictedclients,
-            (server.stat_total_eviction_exceeded_time + current_eviction_exceeded_time) / 1000,
-            current_eviction_exceeded_time / 1000,
-            server.stat_keyspace_hits,
-            server.stat_keyspace_misses,
-            dictSize(server.pubsub_channels),
-            dictSize(server.pubsub_patterns),
-            dictSize(server.pubsubshard_channels),
-            server.stat_fork_time,
-            server.stat_total_forks,
-            dictSize(server.migrate_cached_sockets),
-            getSlaveKeyWithExpireCount(),
-            server.stat_active_defrag_hits,
-            server.stat_active_defrag_misses,
-            server.stat_active_defrag_key_hits,
-            server.stat_active_defrag_key_misses,
-            (server.stat_total_active_defrag_time + current_active_defrag_time) / 1000,
-            current_active_defrag_time / 1000,
-            (unsigned long long) trackingGetTotalKeys(),
-            (unsigned long long) trackingGetTotalItems(),
-            (unsigned long long) trackingGetTotalPrefixes(),
-            server.stat_unexpected_error_replies,
-            server.stat_total_error_replies,
-            server.stat_dump_payload_sanitizations,
-            stat_total_reads_processed,
-            stat_total_writes_processed,
-            server.stat_io_reads_processed,
-            server.stat_io_writes_processed,
-            server.stat_reply_buffer_shrinks,
-            server.stat_reply_buffer_expands,
-            server.duration_stats[EL_DURATION_TYPE_EL].cnt,
-            server.duration_stats[EL_DURATION_TYPE_EL].sum,
-            server.duration_stats[EL_DURATION_TYPE_CMD].sum,
-            getInstantaneousMetric(STATS_METRIC_EL_CYCLE),
-            getInstantaneousMetric(STATS_METRIC_EL_DURATION));
+        info = sdscatprintf(info, "# Stats\r\n" FMTARGS(
+            "total_connections_received:%lld\r\n", server.stat_numconnections,
+            "total_commands_processed:%lld\r\n", server.stat_numcommands,
+            "instantaneous_ops_per_sec:%lld\r\n", getInstantaneousMetric(STATS_METRIC_COMMAND),
+            "total_net_input_bytes:%lld\r\n", stat_net_input_bytes + stat_net_repl_input_bytes,
+            "total_net_output_bytes:%lld\r\n", stat_net_output_bytes + stat_net_repl_output_bytes,
+            "total_net_repl_input_bytes:%lld\r\n", stat_net_repl_input_bytes,
+            "total_net_repl_output_bytes:%lld\r\n", stat_net_repl_output_bytes,
+            "instantaneous_input_kbps:%.2f\r\n", (float)getInstantaneousMetric(STATS_METRIC_NET_INPUT)/1024,
+            "instantaneous_output_kbps:%.2f\r\n", (float)getInstantaneousMetric(STATS_METRIC_NET_OUTPUT)/1024,
+            "instantaneous_input_repl_kbps:%.2f\r\n", (float)getInstantaneousMetric(STATS_METRIC_NET_INPUT_REPLICATION)/1024,
+            "instantaneous_output_repl_kbps:%.2f\r\n", (float)getInstantaneousMetric(STATS_METRIC_NET_OUTPUT_REPLICATION)/1024,
+            "rejected_connections:%lld\r\n", server.stat_rejected_conn,
+            "sync_full:%lld\r\n", server.stat_sync_full,
+            "sync_partial_ok:%lld\r\n", server.stat_sync_partial_ok,
+            "sync_partial_err:%lld\r\n", server.stat_sync_partial_err,
+            "expired_hash_fields:%lld\r\n", server.stat_expired_hash_fields,
+            "expired_keys:%lld\r\n", server.stat_expiredkeys,
+            "expired_stale_perc:%.2f\r\n", server.stat_expired_stale_perc*100,
+            "expired_time_cap_reached_count:%lld\r\n", server.stat_expired_time_cap_reached_count,
+            "expire_cycle_cpu_milliseconds:%lld\r\n", server.stat_expire_cycle_time_used/1000,
+            "evicted_keys:%lld\r\n", server.stat_evictedkeys,
+            "evicted_clients:%lld\r\n", server.stat_evictedclients,
+            "evicted_scripts:%lld\r\n", server.stat_evictedscripts,
+            "total_eviction_exceeded_time:%lld\r\n", (server.stat_total_eviction_exceeded_time + current_eviction_exceeded_time) / 1000,
+            "current_eviction_exceeded_time:%lld\r\n", current_eviction_exceeded_time / 1000,
+            "keyspace_hits:%lld\r\n", server.stat_keyspace_hits,
+            "keyspace_misses:%lld\r\n", server.stat_keyspace_misses,
+            "pubsub_channels:%llu\r\n", kvstoreSize(server.pubsub_channels),
+            "pubsub_patterns:%lu\r\n", dictSize(server.pubsub_patterns),
+            "pubsubshard_channels:%llu\r\n", kvstoreSize(server.pubsubshard_channels),
+            "latest_fork_usec:%lld\r\n", server.stat_fork_time,
+            "total_forks:%lld\r\n", server.stat_total_forks,
+            "migrate_cached_sockets:%ld\r\n", dictSize(server.migrate_cached_sockets),
+            "slave_expires_tracked_keys:%zu\r\n", getSlaveKeyWithExpireCount(),
+            "active_defrag_hits:%lld\r\n", server.stat_active_defrag_hits,
+            "active_defrag_misses:%lld\r\n", server.stat_active_defrag_misses,
+            "active_defrag_key_hits:%lld\r\n", server.stat_active_defrag_key_hits,
+            "active_defrag_key_misses:%lld\r\n", server.stat_active_defrag_key_misses,
+            "total_active_defrag_time:%lld\r\n", (server.stat_total_active_defrag_time + current_active_defrag_time) / 1000,
+            "current_active_defrag_time:%lld\r\n", current_active_defrag_time / 1000,
+            "tracking_total_keys:%lld\r\n", (unsigned long long) trackingGetTotalKeys(),
+            "tracking_total_items:%lld\r\n", (unsigned long long) trackingGetTotalItems(),
+            "tracking_total_prefixes:%lld\r\n", (unsigned long long) trackingGetTotalPrefixes(),
+            "unexpected_error_replies:%lld\r\n", server.stat_unexpected_error_replies,
+            "total_error_replies:%lld\r\n", server.stat_total_error_replies,
+            "dump_payload_sanitizations:%lld\r\n", server.stat_dump_payload_sanitizations,
+            "total_reads_processed:%lld\r\n", stat_total_reads_processed,
+            "total_writes_processed:%lld\r\n", stat_total_writes_processed,
+            "io_threaded_reads_processed:%lld\r\n", server.stat_io_reads_processed,
+            "io_threaded_writes_processed:%lld\r\n", server.stat_io_writes_processed,
+            "client_query_buffer_limit_disconnections:%lld\r\n", stat_client_qbuf_limit_disconnections,
+            "client_output_buffer_limit_disconnections:%lld\r\n", server.stat_client_outbuf_limit_disconnections,
+            "reply_buffer_shrinks:%lld\r\n", server.stat_reply_buffer_shrinks,
+            "reply_buffer_expands:%lld\r\n", server.stat_reply_buffer_expands,
+            "eventloop_cycles:%llu\r\n", server.duration_stats[EL_DURATION_TYPE_EL].cnt,
+            "eventloop_duration_sum:%llu\r\n", server.duration_stats[EL_DURATION_TYPE_EL].sum,
+            "eventloop_duration_cmd_sum:%llu\r\n", server.duration_stats[EL_DURATION_TYPE_CMD].sum,
+            "instantaneous_eventloop_cycles_per_sec:%llu\r\n", getInstantaneousMetric(STATS_METRIC_EL_CYCLE),
+            "instantaneous_eventloop_duration_usec:%llu\r\n", getInstantaneousMetric(STATS_METRIC_EL_DURATION)));
         info = genRedisInfoStringACLStats(info);
     }
 
@@ -6015,42 +5943,26 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
                 slave_read_repl_offset = server.cached_master->read_reploff;
             }
 
-            info = sdscatprintf(info,
-                "master_host:%s\r\n"
-                "master_port:%d\r\n"
-                "master_link_status:%s\r\n"
-                "master_last_io_seconds_ago:%d\r\n"
-                "master_sync_in_progress:%d\r\n"
-                "slave_read_repl_offset:%lld\r\n"
-                "slave_repl_offset:%lld\r\n"
-                ,server.masterhost,
-                server.masterport,
-                (server.repl_state == REPL_STATE_CONNECTED) ?
-                    "up" : "down",
-                server.master ?
-                ((int)(server.unixtime-server.master->lastinteraction)) : -1,
-                server.repl_state == REPL_STATE_TRANSFER,
-                slave_read_repl_offset,
-                slave_repl_offset
-            );
+            info = sdscatprintf(info, FMTARGS(
+                "master_host:%s\r\n", server.masterhost,
+                "master_port:%d\r\n", server.masterport,
+                "master_link_status:%s\r\n", (server.repl_state == REPL_STATE_CONNECTED) ? "up" : "down",
+                "master_last_io_seconds_ago:%d\r\n", server.master ? ((int)(server.unixtime-server.master->lastinteraction)) : -1,
+                "master_sync_in_progress:%d\r\n", server.repl_state == REPL_STATE_TRANSFER,
+                "slave_read_repl_offset:%lld\r\n", slave_read_repl_offset,
+                "slave_repl_offset:%lld\r\n", slave_repl_offset));
 
             if (server.repl_state == REPL_STATE_TRANSFER) {
                 double perc = 0;
                 if (server.repl_transfer_size) {
                     perc = ((double)server.repl_transfer_read / server.repl_transfer_size) * 100;
                 }
-                info = sdscatprintf(info,
-                    "master_sync_total_bytes:%lld\r\n"
-                    "master_sync_read_bytes:%lld\r\n"
-                    "master_sync_left_bytes:%lld\r\n"
-                    "master_sync_perc:%.2f\r\n"
-                    "master_sync_last_io_seconds_ago:%d\r\n",
-                    (long long) server.repl_transfer_size,
-                    (long long) server.repl_transfer_read,
-                    (long long) (server.repl_transfer_size - server.repl_transfer_read),
-                    perc,
-                    (int)(server.unixtime-server.repl_transfer_lastio)
-                );
+                info = sdscatprintf(info, FMTARGS(
+                    "master_sync_total_bytes:%lld\r\n", (long long) server.repl_transfer_size,
+                    "master_sync_read_bytes:%lld\r\n", (long long) server.repl_transfer_read,
+                    "master_sync_left_bytes:%lld\r\n", (long long) (server.repl_transfer_size - server.repl_transfer_read),
+                    "master_sync_perc:%.2f\r\n", perc,
+                    "master_sync_last_io_seconds_ago:%d\r\n", (int)(server.unixtime-server.repl_transfer_lastio)));
             }
 
             if (server.repl_state != REPL_STATE_CONNECTED) {
@@ -6059,13 +5971,10 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
                     server.repl_down_since ?
                     (intmax_t)(server.unixtime-server.repl_down_since) : -1);
             }
-            info = sdscatprintf(info,
-                "slave_priority:%d\r\n"
-                "slave_read_only:%d\r\n"
-                "replica_announced:%d\r\n",
-                server.slave_priority,
-                server.repl_slave_ro,
-                server.replica_announced);
+            info = sdscatprintf(info, FMTARGS(
+                "slave_priority:%d\r\n", server.slave_priority,
+                "slave_read_only:%d\r\n", server.repl_slave_ro,
+                "replica_announced:%d\r\n", server.replica_announced));
         }
 
         info = sdscatprintf(info,
@@ -6111,25 +6020,16 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
                 slaveid++;
             }
         }
-        info = sdscatprintf(info,
-            "master_failover_state:%s\r\n"
-            "master_replid:%s\r\n"
-            "master_replid2:%s\r\n"
-            "master_repl_offset:%lld\r\n"
-            "second_repl_offset:%lld\r\n"
-            "repl_backlog_active:%d\r\n"
-            "repl_backlog_size:%lld\r\n"
-            "repl_backlog_first_byte_offset:%lld\r\n"
-            "repl_backlog_histlen:%lld\r\n",
-            getFailoverStateString(),
-            server.replid,
-            server.replid2,
-            server.master_repl_offset,
-            server.second_replid_offset,
-            server.repl_backlog != NULL,
-            server.repl_backlog_size,
-            server.repl_backlog ? server.repl_backlog->offset : 0,
-            server.repl_backlog ? server.repl_backlog->histlen : 0);
+        info = sdscatprintf(info, FMTARGS(
+            "master_failover_state:%s\r\n", getFailoverStateString(),
+            "master_replid:%s\r\n", server.replid,
+            "master_replid2:%s\r\n", server.replid2,
+            "master_repl_offset:%lld\r\n", server.master_repl_offset,
+            "second_repl_offset:%lld\r\n", server.second_replid_offset,
+            "repl_backlog_active:%d\r\n", server.repl_backlog != NULL,
+            "repl_backlog_size:%lld\r\n", server.repl_backlog_size,
+            "repl_backlog_first_byte_offset:%lld\r\n", server.repl_backlog ? server.repl_backlog->offset : 0,
+            "repl_backlog_histlen:%lld\r\n", server.repl_backlog ? server.repl_backlog->histlen : 0));
     }
 
     /* CPU */
@@ -6216,14 +6116,16 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
         if (sections++) info = sdscat(info,"\r\n");
         info = sdscatprintf(info, "# Keyspace\r\n");
         for (j = 0; j < server.dbnum; j++) {
-            long long keys, vkeys;
+            long long keys, vkeys, hexpires;
+
+            keys = kvstoreSize(server.db[j].keys);
+            vkeys = kvstoreSize(server.db[j].expires);
+            hexpires = ebGetTotalItems(server.db[j].hexpires, &hashExpireBucketsType);
 
-            keys = dictSize(server.db[j].dict);
-            vkeys = dictSize(server.db[j].expires);
             if (keys || vkeys) {
                 info = sdscatprintf(info,
-                    "db%d:keys=%lld,expires=%lld,avg_ttl=%lld\r\n",
-                    j, keys, vkeys, server.db[j].avg_ttl);
+                    "db%d:keys=%lld,expires=%lld,avg_ttl=%lld,hashes_with_expiry_fields=%lld\r\n",
+                    j, keys, vkeys, server.db[j].avg_ttl, hexpires);
             }
         }
     }
@@ -6245,16 +6147,15 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
 
     if (dictFind(section_dict, "debug") != NULL) {
         if (sections++) info = sdscat(info,"\r\n");
-        info = sdscatprintf(info,
-        "# Debug\r\n"
-        "eventloop_duration_aof_sum:%llu\r\n"
-        "eventloop_duration_cron_sum:%llu\r\n"
-        "eventloop_duration_max:%llu\r\n"
-        "eventloop_cmd_per_cycle_max:%lld\r\n",
-        server.duration_stats[EL_DURATION_TYPE_AOF].sum,
-        server.duration_stats[EL_DURATION_TYPE_CRON].sum,
-        server.duration_stats[EL_DURATION_TYPE_EL].max,
-        server.el_cmd_cnt_max);
+        info = sdscatprintf(info, "# Debug\r\n" FMTARGS(
+            "eventloop_duration_aof_sum:%llu\r\n", server.duration_stats[EL_DURATION_TYPE_AOF].sum,
+            "eventloop_duration_cron_sum:%llu\r\n", server.duration_stats[EL_DURATION_TYPE_CRON].sum,
+            "eventloop_duration_max:%llu\r\n", server.duration_stats[EL_DURATION_TYPE_EL].max,
+            "eventloop_cmd_per_cycle_max:%lld\r\n", server.el_cmd_cnt_max,
+            "allocator_allocated_lua:%zu\r\n", server.cron_malloc_stats.lua_allocator_allocated,
+            "allocator_active_lua:%zu\r\n", server.cron_malloc_stats.lua_allocator_active,
+            "allocator_resident_lua:%zu\r\n", server.cron_malloc_stats.lua_allocator_resident,
+            "allocator_frag_bytes_lua:%zu\r\n", server.cron_malloc_stats.lua_allocator_frag_smallbins_bytes));
     }
 
     return info;
@@ -6377,15 +6278,16 @@ void daemonize(void) {
     }
 }
 
-void version(void) {
-    printf("Redis server v=%s sha=%s:%d malloc=%s bits=%d build=%llx\n",
+sds getVersion(void) {
+    sds version = sdscatprintf(sdsempty(),
+        "v=%s sha=%s:%d malloc=%s bits=%d build=%llx",
         REDIS_VERSION,
         redisGitSHA1(),
         atoi(redisGitDirty()) > 0,
         ZMALLOC_LIB,
         sizeof(long) == 4 ? 32 : 64,
         (unsigned long long) redisBuildId());
-    exit(0);
+    return version;
 }
 
 void usage(void) {
@@ -6501,14 +6403,14 @@ static void sigShutdownHandler(int sig) {
      * the user really wanting to quit ASAP without waiting to persist
      * on disk and without waiting for lagging replicas. */
     if (server.shutdown_asap && sig == SIGINT) {
-        serverLogFromHandler(LL_WARNING, "You insist... exiting now.");
+        serverLogRawFromHandler(LL_WARNING, "You insist... exiting now.");
         rdbRemoveTempFile(getpid(), 1);
         exit(1); /* Exit with an error since this was not a clean shutdown. */
     } else if (server.loading) {
         msg = "Received shutdown signal during loading, scheduling shutdown.";
     }
 
-    serverLogFromHandler(LL_WARNING, msg);
+    serverLogRawFromHandler(LL_WARNING, msg);
     server.shutdown_asap = 1;
     server.last_sig_received = sig;
 }
@@ -6516,37 +6418,13 @@ static void sigShutdownHandler(int sig) {
 void setupSignalHandlers(void) {
     struct sigaction act;
 
-    /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction is used.
-     * Otherwise, sa_handler is used. */
     sigemptyset(&act.sa_mask);
     act.sa_flags = 0;
     act.sa_handler = sigShutdownHandler;
     sigaction(SIGTERM, &act, NULL);
     sigaction(SIGINT, &act, NULL);
 
-    sigemptyset(&act.sa_mask);
-    act.sa_flags = SA_NODEFER | SA_RESETHAND | SA_SIGINFO;
-    act.sa_sigaction = sigsegvHandler;
-    if(server.crashlog_enabled) {
-        sigaction(SIGSEGV, &act, NULL);
-        sigaction(SIGBUS, &act, NULL);
-        sigaction(SIGFPE, &act, NULL);
-        sigaction(SIGILL, &act, NULL);
-        sigaction(SIGABRT, &act, NULL);
-    }
-    return;
-}
-
-void removeSignalHandlers(void) {
-    struct sigaction act;
-    sigemptyset(&act.sa_mask);
-    act.sa_flags = SA_NODEFER | SA_RESETHAND;
-    act.sa_handler = SIG_DFL;
-    sigaction(SIGSEGV, &act, NULL);
-    sigaction(SIGBUS, &act, NULL);
-    sigaction(SIGFPE, &act, NULL);
-    sigaction(SIGILL, &act, NULL);
-    sigaction(SIGABRT, &act, NULL);
+    setupDebugSigHandlers();
 }
 
 /* This is the signal handler for children process. It is currently useful
@@ -6556,7 +6434,7 @@ void removeSignalHandlers(void) {
 static void sigKillChildHandler(int sig) {
     UNUSED(sig);
     int level = server.in_fork_child == CHILD_TYPE_MODULE? LL_VERBOSE: LL_WARNING;
-    serverLogFromHandler(level, "Received SIGUSR1 in child, exiting now.");
+    serverLogRawFromHandler(level, "Received SIGUSR1 in child, exiting now.");
     exitFromChild(SERVER_CHILD_NOERROR_RETVAL);
 }
 
@@ -6990,7 +6868,7 @@ int redisIsSupervised(int mode) {
 
 int iAmMaster(void) {
     return ((!server.cluster_enabled && server.masterhost == NULL) ||
-            (server.cluster_enabled && nodeIsMaster(server.cluster->myself)));
+            (server.cluster_enabled && clusterNodeIsMaster(getMyClusterNode())));
 }
 
 #ifdef REDIS_TEST
@@ -7019,8 +6897,11 @@ struct redisTest {
     {"crc64", crc64Test},
     {"zmalloc", zmalloc_test},
     {"sds", sdsTest},
+    {"mstr", mstrTest},
     {"dict", dictTest},
-    {"listpack", listpackTest}
+    {"listpack", listpackTest},
+    {"kvstore", kvstoreTest},
+    {"ebuckets", ebucketsTest},
 };
 redisTestProc *getTestProcByName(const char *name) {
     int numtests = sizeof(redisTests)/sizeof(struct redisTest);
@@ -7039,6 +6920,7 @@ int main(int argc, char **argv) {
     char config_from_stdin = 0;
 
 #ifdef REDIS_TEST
+    monotonicInit(); /* Required for dict tests, that are relying on monotime during dict rehashing. */
     if (argc >= 3 && !strcasecmp(argv[1], "test")) {
         int flags = 0;
         for (j = 3; j < argc; j++) {
@@ -7046,6 +6928,7 @@ int main(int argc, char **argv) {
             if (!strcasecmp(arg, "--accurate")) flags |= REDIS_TEST_ACCURATE;
             else if (!strcasecmp(arg, "--large-memory")) flags |= REDIS_TEST_LARGE_MEMORY;
             else if (!strcasecmp(arg, "--valgrind")) flags |= REDIS_TEST_VALGRIND;
+            else if (!strcasecmp(arg, "--verbose")) flags |= REDIS_TEST_VERBOSE;
         }
 
         if (!strcasecmp(argv[2], "all")) {
@@ -7142,7 +7025,13 @@ int main(int argc, char **argv) {
 
         /* Handle special options --help and --version */
         if (strcmp(argv[1], "-v") == 0 ||
-            strcmp(argv[1], "--version") == 0) version();
+            strcmp(argv[1], "--version") == 0)
+        {
+            sds version = getVersion();
+            printf("Redis server %s\n", version);
+            sdsfree(version);
+            exit(0);
+        }
         if (strcmp(argv[1], "--help") == 0 ||
             strcmp(argv[1], "-h") == 0) usage();
         if (strcmp(argv[1], "--test-memory") == 0) {
@@ -7312,7 +7201,7 @@ int main(int argc, char **argv) {
     ACLLoadUsersAtStartup();
     initListeners();
     if (server.cluster_enabled) {
-        clusterInitListeners();
+        clusterInitLast();
     }
     InitServerLast();
 
diff --git a/src/server.h b/src/server.h
index cb555031edf..25d14ebe5b9 100644
--- a/src/server.h
+++ b/src/server.h
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __REDIS_H
@@ -37,7 +16,6 @@
 #include "atomicvar.h"
 #include "commands.h"
 
-#include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stddef.h>
@@ -67,7 +45,10 @@ typedef long long ustime_t; /* microsecond time type. */
 
 #include "ae.h"      /* Event driven programming library */
 #include "sds.h"     /* Dynamic safe strings */
+#include "mstr.h"    /* Immutable strings with optional metadata attached */
+#include "ebuckets.h" /* expiry data structure */
 #include "dict.h"    /* Hash tables */
+#include "kvstore.h" /* Slot-based hash table */
 #include "adlist.h"  /* Linked lists */
 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
 #include "anet.h"    /* Networking the easy way */
@@ -116,6 +97,7 @@ struct hdr_histogram;
 #define CONFIG_MAX_HZ            500
 #define MAX_CLIENTS_PER_CLOCK_TICK 200          /* HZ is adapted based on that. */
 #define CRON_DBS_PER_CALL 16
+#define CRON_DICTS_PER_DB 16
 #define NET_MAX_WRITES_PER_EVENT (1024*64)
 #define PROTO_SHARED_SELECT_CMDS 10
 #define OBJ_SHARED_INTEGERS 10000
@@ -138,6 +120,7 @@ struct hdr_histogram;
 #define CONFIG_BINDADDR_MAX 16
 #define CONFIG_MIN_RESERVED_FDS 32
 #define CONFIG_DEFAULT_PROC_TITLE_TEMPLATE "{title} {listen-addr} {server-mode}"
+#define INCREMENTAL_REHASHING_THRESHOLD_US 1000
 
 /* Bucket sizes for client eviction pools. Each bucket stores clients with
  * memory usage of up to twice the size of the bucket below it. */
@@ -198,7 +181,6 @@ struct hdr_histogram;
 extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
 
 /* Hash table parameters */
-#define HASHTABLE_MIN_FILL        10      /* Minimal hash table fill 10% */
 #define HASHTABLE_MAX_LOAD_FACTOR 1.618   /* Maximum hash table load factor. */
 
 /* Command flags. Please check the definition of struct redisCommand in this file
@@ -400,6 +382,10 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
                                                     auth had been authenticated from the Module. */
 #define CLIENT_MODULE_PREVENT_AOF_PROP (1ULL<<48) /* Module client do not want to propagate to AOF */
 #define CLIENT_MODULE_PREVENT_REPL_PROP (1ULL<<49) /* Module client do not want to propagate to replica */
+#define CLIENT_REPROCESSING_COMMAND (1ULL<<50) /* The client is re-processing the command. */
+
+/* Any flag that does not let optimize FLUSH SYNC to run it in bg as blocking client ASYNC */
+#define CLIENT_AVOID_BLOCKING_ASYNC_FLUSH (CLIENT_DENY_BLOCKING|CLIENT_MULTI|CLIENT_LUA_DEBUG|CLIENT_LUA_DEBUG_SYNC|CLIENT_MODULE)
 
 /* Client block type (btype field in client structure)
  * if CLIENT_BLOCKED flag is set. */
@@ -413,6 +399,7 @@ typedef enum blocking_type {
     BLOCKED_ZSET,    /* BZPOP et al. */
     BLOCKED_POSTPONE, /* Blocked by processCommand, re-try processing later. */
     BLOCKED_SHUTDOWN, /* SHUTDOWN. */
+    BLOCKED_LAZYFREE, /* LAZYFREE */
     BLOCKED_NUM,      /* Number of blocked states. */
     BLOCKED_END       /* End of enumeration */
 } blocking_type;
@@ -521,6 +508,7 @@ typedef enum {
 
 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^64 elements */
 #define ZSKIPLIST_P 0.25      /* Skiplist P = 1/4 */
+#define ZSKIPLIST_MAX_SEARCH 10
 
 /* Append only defines */
 #define AOF_FSYNC_NO 0
@@ -668,10 +656,18 @@ typedef enum {
 #define run_with_period(_ms_) if (((_ms_) <= 1000/server.hz) || !(server.cronloops%((_ms_)/(1000/server.hz))))
 
 /* We can print the stacktrace, so our assert is defined this way: */
-#define serverAssertWithInfo(_c,_o,_e) ((_e)?(void)0 : (_serverAssertWithInfo(_c,_o,#_e,__FILE__,__LINE__),redis_unreachable()))
-#define serverAssert(_e) ((_e)?(void)0 : (_serverAssert(#_e,__FILE__,__LINE__),redis_unreachable()))
+#define serverAssertWithInfo(_c,_o,_e) (likely(_e)?(void)0 : (_serverAssertWithInfo(_c,_o,#_e,__FILE__,__LINE__),redis_unreachable()))
+#define serverAssert(_e) (likely(_e)?(void)0 : (_serverAssert(#_e,__FILE__,__LINE__),redis_unreachable()))
 #define serverPanic(...) _serverPanic(__FILE__,__LINE__,__VA_ARGS__),redis_unreachable()
 
+/* The following macros provide assertions that are only executed during test builds and should be used to add
+ * assertions that are too computationally expensive or dangerous to run during normal operations.  */
+#ifdef DEBUG_ASSERTIONS
+#define debugServerAssertWithInfo(...) serverAssertWithInfo(__VA_ARGS__)
+#else
+#define debugServerAssertWithInfo(...)
+#endif
+
 /* latency histogram per command init settings */
 #define LATENCY_HISTOGRAM_MIN_VALUE 1L        /* >= 1 nanosec */
 #define LATENCY_HISTOGRAM_MAX_VALUE 1000000000L  /* <= 1 secs */
@@ -729,6 +725,7 @@ struct RedisModuleCtx;
 struct moduleLoadQueueEntry;
 struct RedisModuleKeyOptCtx;
 struct RedisModuleCommand;
+struct clusterState;
 
 /* Each module type implementation should export a set of methods in order
  * to serialize and deserialize the value in the RDB file, rewrite the AOF
@@ -821,6 +818,7 @@ struct RedisModule {
     struct moduleLoadQueueEntry *loadmod; /* Module load arguments for config rewrite. */
     int num_commands_with_acl_categories; /* Number of commands in this module included in acl categories */
     int onload;     /* Flag to identify if the call is being made from Onload (0 or 1) */
+    size_t num_acl_categories_added; /* Number of acl categories added by this module. */
 };
 typedef struct RedisModule RedisModule;
 
@@ -888,6 +886,7 @@ struct RedisModuleDigest {
 #define OBJ_ENCODING_QUICKLIST 9 /* Encoded as linked list of listpacks */
 #define OBJ_ENCODING_STREAM 10 /* Encoded as a radix tree of listpacks */
 #define OBJ_ENCODING_LISTPACK 11 /* Encoded as a listpack */
+#define OBJ_ENCODING_LISTPACK_EX 12 /* Encoded as listpack, extended with metadata */
 
 #define LRU_BITS 24
 #define LRU_CLOCK_MAX ((1<<LRU_BITS)-1) /* Max value of obj->lru */
@@ -958,15 +957,13 @@ typedef struct replBufBlock {
     char buf[];
 } replBufBlock;
 
-/* Opaque type for the Slot to Key API. */
-typedef struct clusterSlotToKeyMapping clusterSlotToKeyMapping;
-
 /* Redis database representation. There are multiple databases identified
  * by integers from 0 (the default database) up to the max configured
  * database. The database number is the 'id' field in the structure. */
 typedef struct redisDb {
-    dict *dict;                 /* The keyspace for this DB */
-    dict *expires;              /* Timeout of keys with a timeout set */
+    kvstore *keys;              /* The keyspace for this DB */
+    kvstore *expires;           /* Timeout of keys with a timeout set */
+    ebuckets hexpires;          /* Hash expiration DS. Single TTL per hash (of next min field to expire) */
     dict *blocking_keys;        /* Keys with clients waiting for data (BLPOP)*/
     dict *blocking_keys_unblock_on_nokey;   /* Keys with clients waiting for
                                              * data, and should be unblocked if key is deleted (XREADEDGROUP).
@@ -977,7 +974,6 @@ typedef struct redisDb {
     long long avg_ttl;          /* Average TTL, just for stats */
     unsigned long expires_cursor; /* Cursor of the active expire cycle. */
     list *defrag_later;         /* List of key names to attempt to defrag one by one, gradually. */
-    clusterSlotToKeyMapping *slots_to_keys; /* Array of slots to keys. Only used in cluster mode (db 0). */
 } redisDb;
 
 /* forward declaration for functions ctx */
@@ -1039,6 +1035,9 @@ typedef struct blockingState {
     void *async_rm_call_handle; /* RedisModuleAsyncRMCallPromise structure.
                                    which is opaque for the Redis core, only
                                    handled in module.c. */
+
+    /* BLOCKED_LAZYFREE */
+    monotime lazyfreeStartTime;
 } blockingState;
 
 /* The following structure represents a node in the server.ready_keys list,
@@ -1317,8 +1316,9 @@ struct sharedObjectsStruct {
     *busykeyerr, *oomerr, *plus, *messagebulk, *pmessagebulk, *subscribebulk,
     *unsubscribebulk, *psubscribebulk, *punsubscribebulk, *del, *unlink,
     *rpop, *lpop, *lpush, *rpoplpush, *lmove, *blmove, *zpopmin, *zpopmax,
-    *emptyscan, *multi, *exec, *left, *right, *hset, *srem, *xgroup, *xclaim,  
-    *script, *replconf, *eval, *persist, *set, *pexpireat, *pexpire, 
+    *emptyscan, *multi, *exec, *left, *right, *hset, *srem, *xgroup, *xclaim,
+    *script, *replconf, *eval, *persist, *set, *pexpireat, *pexpire,
+    *hdel, *hpexpireat,
     *time, *pxat, *absttl, *retrycount, *force, *justid, *entriesread,
     *lastid, *ping, *setid, *keepttl, *load, *createconsumer,
     *getack, *special_asterick, *special_equals, *default_username, *redacted,
@@ -1413,11 +1413,13 @@ struct redisMemOverhead {
     float rss_extra;
     size_t rss_extra_bytes;
     size_t num_dbs;
+    size_t overhead_db_hashtable_lut;
+    size_t overhead_db_hashtable_rehashing;
+    unsigned long db_dict_rehashing_count;
     struct {
         size_t dbid;
         size_t overhead_ht_main;
         size_t overhead_ht_expires;
-        size_t overhead_ht_slot_to_keys;
     } *db;
 };
 
@@ -1456,6 +1458,12 @@ struct malloc_stats {
     size_t allocator_allocated;
     size_t allocator_active;
     size_t allocator_resident;
+    size_t allocator_muzzy;
+    size_t allocator_frag_smallbins_bytes;
+    size_t lua_allocator_allocated;
+    size_t lua_allocator_active;
+    size_t lua_allocator_resident;
+    size_t lua_allocator_frag_smallbins_bytes;
 };
 
 /*-----------------------------------------------------------------------------
@@ -1550,6 +1558,7 @@ struct redisServer {
     dict *orig_commands;        /* Command table before command renaming. */
     aeEventLoop *el;
     rax *errors;                /* Errors table */
+    int errors_enabled;         /* If true, errorstats is enabled, and we will add new errors. */
     unsigned int lruclock; /* Clock for LRU eviction */
     volatile sig_atomic_t shutdown_asap; /* Shutdown ordered by signal handler. */
     mstime_t shutdown_mstime;   /* Timestamp to limit graceful shutdown. */
@@ -1580,6 +1589,7 @@ struct redisServer {
     int module_pipe[2];         /* Pipe used to awake the event loop by module threads. */
     pid_t child_pid;            /* PID of current child */
     int child_type;             /* Type of current child */
+    redisAtomic int module_gil_acquring; /* Indicates whether the GIL is being acquiring by the main thread. */
     /* Networking */
     int port;                   /* TCP listening port */
     int tls_port;               /* TLS listening port */
@@ -1641,11 +1651,13 @@ struct redisServer {
     long long stat_numcommands;     /* Number of processed commands */
     long long stat_numconnections;  /* Number of connections received */
     long long stat_expiredkeys;     /* Number of expired keys */
+    long long stat_expired_hash_fields; /* Number of expired hash-fields */
     double stat_expired_stale_perc; /* Percentage of keys probably expired */
     long long stat_expired_time_cap_reached_count; /* Early expire cycle stops.*/
     long long stat_expire_cycle_time_used; /* Cumulative microseconds used. */
     long long stat_evictedkeys;     /* Number of evicted keys (maxmemory) */
     long long stat_evictedclients;  /* Number of evicted clients */
+    long long stat_evictedscripts;  /* Number of evicted lua scripts. */
     long long stat_total_eviction_exceeded_time;  /* Total time over the memory limit, unit us */
     monotime stat_last_eviction_exceeded_time;  /* Timestamp of current eviction start, unit us */
     long long stat_keyspace_hits;   /* Number of successful lookups of keys */
@@ -1695,6 +1707,8 @@ struct redisServer {
     long long stat_io_writes_processed; /* Number of write events processed by IO / Main threads */
     redisAtomic long long stat_total_reads_processed; /* Total number of read events processed */
     redisAtomic long long stat_total_writes_processed; /* Total number of write events processed */
+    redisAtomic long long stat_client_qbuf_limit_disconnections;  /* Total number of clients reached query buf length limit */
+    long long stat_client_outbuf_limit_disconnections;  /* Total number of clients reached output buf length limit */
     /* The following two are used to track instantaneous metrics, like
      * number of operations per second, network traffic. */
     struct {
@@ -1712,7 +1726,7 @@ struct redisServer {
     long long el_cmd_cnt_max;
     /* The sum of active-expire, active-defrag and all other tasks done by cron and beforeSleep,
        but excluding read, write and AOF, which are counted by other sets of metrics. */
-    monotime el_cron_duration; 
+    monotime el_cron_duration;
     durationStats duration_stats[EL_DURATION_TYPE_NUM];
 
     /* Configuration */
@@ -1726,6 +1740,8 @@ struct redisServer {
     int sanitize_dump_payload;      /* Enables deep sanitization for ziplist and listpack in RDB and RESTORE. */
     int skip_checksum_validation;   /* Disable checksum validation for RDB and RESTORE payload. */
     int jemalloc_bg_thread;         /* Enable jemalloc background thread */
+    int active_defrag_configuration_changed; /* defrag configuration has been changed and need to reconsider
+                                              * active_defrag_running in computeDefragCycles. */
     size_t active_defrag_ignore_bytes; /* minimum amount of fragmentation waste to start active defrag */
     int active_defrag_threshold_lower; /* minimum percentage of fragmentation to start active defrag */
     int active_defrag_threshold_upper; /* maximum percentage of fragmentation at which we use maximum effort */
@@ -1741,9 +1757,12 @@ struct redisServer {
     char *proc_title_template;      /* Process title template format */
     clientBufferLimitsConfig client_obuf_limits[CLIENT_TYPE_OBUF_COUNT];
     int pause_cron;                 /* Don't run cron tasks (debug) */
+    int dict_resizing;              /* Whether to allow main dict and expired dict to be resized (debug) */
     int latency_tracking_enabled;   /* 1 if extended latency tracking is enabled, 0 otherwise. */
     double *latency_tracking_info_percentiles; /* Extended latency tracking info output percentile list configuration. */
     int latency_tracking_info_percentiles_len;
+    unsigned int max_new_tls_conns_per_cycle; /* The maximum number of tls connections that will be accepted during each invocation of the event loop. */
+    unsigned int max_new_conns_per_cycle; /* The maximum number of tcp connections that will be accepted during each invocation of the event loop. */
     /* AOF persistence */
     int aof_enabled;                /* AOF configuration */
     int aof_state;                  /* AOF_(ON|OFF|WAIT_REWRITE) */
@@ -1763,8 +1782,8 @@ struct redisServer {
     sds aof_buf;      /* AOF buffer, written before entering the event loop */
     int aof_fd;       /* File descriptor of currently selected AOF file */
     int aof_selected_db; /* Currently selected DB in AOF */
-    time_t aof_flush_postponed_start; /* UNIX time of postponed AOF flush */
-    time_t aof_last_fsync;            /* UNIX time of last fsync() */
+    mstime_t aof_flush_postponed_start; /* mstime of postponed AOF flush */
+    mstime_t aof_last_fsync;            /* mstime of last fsync() */
     time_t aof_rewrite_time_last;   /* Time used by last AOF rewrite run. */
     time_t aof_rewrite_time_start;  /* Current AOF rewrite start time. */
     time_t aof_cur_timestamp;       /* Current record timestamp in AOF */
@@ -1953,18 +1972,20 @@ struct redisServer {
     /* time cache */
     redisAtomic time_t unixtime; /* Unix time sampled every cron cycle. */
     time_t timezone;            /* Cached timezone. As set by tzset(). */
-    int daylight_active;        /* Currently in daylight saving time. */
+    redisAtomic int daylight_active; /* Currently in daylight saving time. */
     mstime_t mstime;            /* 'unixtime' in milliseconds. */
     ustime_t ustime;            /* 'unixtime' in microseconds. */
     mstime_t cmd_time_snapshot; /* Time snapshot of the root execution nesting. */
     size_t blocking_op_nesting; /* Nesting level of blocking operation, used to reset blocked_last_cron. */
     long long blocked_last_cron; /* Indicate the mstime of the last time we did cron jobs from a blocking operation */
     /* Pubsub */
-    dict *pubsub_channels;  /* Map channels to list of subscribed clients */
+    kvstore *pubsub_channels;  /* Map channels to list of subscribed clients */
     dict *pubsub_patterns;  /* A dict of pubsub_patterns */
     int notify_keyspace_events; /* Events to propagate via Pub/Sub. This is an
                                    xor of NOTIFY_... flags. */
-    dict *pubsubshard_channels;  /* Map shard channels to list of subscribed clients */
+    kvstore *pubsubshard_channels;  /* Map shard channels in every slot to list of subscribed clients */
+    unsigned int pubsub_clients; /* # of clients in Pub/Sub mode */
+    unsigned int watching_clients; /* # of clients are wathcing keys */
     /* Cluster */
     int cluster_enabled;      /* Is cluster enabled? */
     int cluster_port;         /* Set the cluster port for a node. */
@@ -1997,9 +2018,10 @@ struct redisServer {
     int cluster_drop_packet_filter; /* Debug config that allows tactically
                                    * dropping packets of a specific type */
     /* Scripting */
+    unsigned int lua_arena;         /* eval lua arena used in jemalloc. */
     mstime_t busy_reply_threshold;  /* Script / module timeout in milliseconds */
     int pre_command_oom_state;         /* OOM before command (script?) was started */
-    int script_disable_deny_script;    /* Allow running commands marked "no-script" inside a script. */
+    int script_disable_deny_script;    /* Allow running commands marked "noscript" inside a script. */
     /* Lazy free */
     int lazyfree_lazy_eviction;
     int lazyfree_lazy_expire;
@@ -2049,7 +2071,8 @@ struct redisServer {
     char *locale_collate;
 };
 
-#define MAX_KEYS_BUFFER 256
+/* we use 6 so that all getKeyResult fits a cacheline */
+#define MAX_KEYS_BUFFER 6
 
 typedef struct {
     int pos; /* The position of the key within the client array */
@@ -2062,12 +2085,12 @@ typedef struct {
  * for returning channel information.
  */
 typedef struct {
+    int numkeys;                                 /* Number of key indices return */
+    int size;                                    /* Available array size */
     keyReference keysbuf[MAX_KEYS_BUFFER];       /* Pre-allocated buffer, to save heap allocations */
     keyReference *keys;                          /* Key indices array, points to keysbuf or heap */
-    int numkeys;                        /* Number of key indices return */
-    int size;                           /* Available array size */
 } getKeysResult;
-#define GETKEYS_RESULT_INIT { {{0}}, NULL, 0, MAX_KEYS_BUFFER }
+#define GETKEYS_RESULT_INIT { 0, MAX_KEYS_BUFFER, {{0}}, NULL }
 
 /* Key specs definitions.
  *
@@ -2417,7 +2440,8 @@ typedef struct {
     robj *subject;
     int encoding;
 
-    unsigned char *fptr, *vptr;
+    unsigned char *fptr, *vptr, *tptr;
+    uint64_t expire_time; /* Only used with OBJ_ENCODING_LISTPACK_EX */
 
     dictIterator *di;
     dictEntry *de;
@@ -2433,6 +2457,10 @@ typedef struct {
 #define IO_THREADS_OP_WRITE 2
 extern int io_threads_op;
 
+/* Hash-field data type (of t_hash.c) */
+typedef mstr hfield;
+extern  mstrKind mstrFieldKind;
+
 /*-----------------------------------------------------------------------------
  * Extern declarations
  *----------------------------------------------------------------------------*/
@@ -2447,14 +2475,22 @@ extern dictType zsetDictType;
 extern dictType dbDictType;
 extern double R_Zero, R_PosInf, R_NegInf, R_Nan;
 extern dictType hashDictType;
+extern dictType mstrHashDictType;
+extern dictType mstrHashDictTypeWithHFE;
 extern dictType stringSetDictType;
 extern dictType externalStringType;
 extern dictType sdsHashDictType;
+extern dictType clientDictType;
+extern dictType objToDictDictType;
 extern dictType dbExpiresDictType;
 extern dictType modulesDictType;
 extern dictType sdsReplyDictType;
+extern dictType keylistDictType;
 extern dict *modules;
 
+extern EbucketsType hashExpireBucketsType;  /* global expires */
+extern EbucketsType hashFieldExpireBucketsType; /* local per hash */
+
 /*-----------------------------------------------------------------------------
  * Functions prototypes
  *----------------------------------------------------------------------------*/
@@ -2479,9 +2515,10 @@ const char *moduleTypeModuleName(moduleType *mt);
 const char *moduleNameFromCommand(struct redisCommand *cmd);
 void moduleFreeContext(struct RedisModuleCtx *ctx);
 void moduleCallCommandUnblockedHandler(client *c);
+int isModuleClientUnblocked(client *c);
 void unblockClientFromModule(client *c);
 void moduleHandleBlockedClients(void);
-void moduleBlockedClientTimedOut(client *c);
+void moduleBlockedClientTimedOut(client *c, int from_module);
 void modulePipeReadable(aeEventLoop *el, int fd, void *privdata, int mask);
 size_t moduleCount(void);
 void moduleAcquireGIL(void);
@@ -2535,6 +2572,7 @@ void redisSetCpuAffinity(const char *cpulist);
 client *createClient(connection *conn);
 void freeClient(client *c);
 void freeClientAsync(client *c);
+void deauthenticateAndCloseClient(client *c);
 void logInvalidUseAndFreeClientAsync(client *c, const char *fmt, ...);
 int beforeNextClient(client *c);
 void clearClientConnectionState(client *c);
@@ -2589,12 +2627,14 @@ void addReplySetLen(client *c, long length);
 void addReplyAttributeLen(client *c, long length);
 void addReplyPushLen(client *c, long length);
 void addReplyHelp(client *c, const char **help);
+void addExtendedReplyHelp(client *c, const char **help, const char **extended_help);
 void addReplySubcommandSyntaxError(client *c);
 void addReplyLoadedModules(client *c);
 void copyReplicaOutputBuffer(client *dst, client *src);
 void addListRangeReply(client *c, robj *o, long start, long end, int reverse);
 void deferredAfterErrorReply(client *c, list *errors);
 size_t sdsZmallocSize(sds s);
+size_t hfieldZmallocSize(hfield s);
 size_t getStringObjectSdsUsedMemory(robj *o);
 void freeClientReplyValue(void *o);
 void *dupClientReplyValue(void *o);
@@ -2674,6 +2714,7 @@ void trackingHandlePendingKeyInvalidations(void);
 void trackingInvalidateKeysOnFlush(int async);
 void freeTrackingRadixTree(rax *rt);
 void freeTrackingRadixTreeAsync(rax *rt);
+void freeErrorsRadixTreeAsync(rax *errors);
 void trackingLimitUsedSlots(void);
 uint64_t trackingGetTotalItems(void);
 uint64_t trackingGetTotalKeys(void);
@@ -2750,7 +2791,7 @@ robj *createStringObjectFromLongLong(long long value);
 robj *createStringObjectFromLongLongForValue(long long value);
 robj *createStringObjectFromLongLongWithSds(long long value);
 robj *createStringObjectFromLongDouble(long double value, int humanfriendly);
-robj *createQuicklistObject(void);
+robj *createQuicklistObject(int fill, int compress);
 robj *createListListpackObject(void);
 robj *createSetObject(void);
 robj *createIntsetObject(void);
@@ -2925,6 +2966,8 @@ int ACLCheckAllPerm(client *c, int *idxptr);
 int ACLSetUser(user *u, const char *op, ssize_t oplen);
 sds ACLStringSetUser(user *u, sds username, sds *argv, int argc);
 uint64_t ACLGetCommandCategoryFlagByName(const char *name);
+int ACLAddCommandCategory(const char *name, uint64_t flag);
+void ACLCleanupCategoriesOnFailure(size_t num_acl_categories_added);
 int ACLAppendUserForLoading(sds *argv, int argc, int *argc_err);
 const char *ACLSetUserStringError(void);
 int ACLLoadConfiguredUsers(void);
@@ -2976,8 +3019,7 @@ void zslFree(zskiplist *zsl);
 zskiplistNode *zslInsert(zskiplist *zsl, double score, sds ele);
 unsigned char *zzlInsert(unsigned char *zl, sds ele, double score);
 int zslDelete(zskiplist *zsl, double score, sds ele, zskiplistNode **node);
-zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec *range);
-zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec *range);
+zskiplistNode *zslNthInRange(zskiplist *zsl, zrangespec *range, long n);
 double zzlGetScore(unsigned char *sptr);
 void zzlNext(unsigned char *zl, unsigned char **eptr, unsigned char **sptr);
 void zzlPrev(unsigned char *zl, unsigned char **eptr, unsigned char **sptr);
@@ -3000,8 +3042,7 @@ void zslFreeLexRange(zlexrangespec *spec);
 int zslParseLexRange(robj *min, robj *max, zlexrangespec *spec);
 unsigned char *zzlFirstInLexRange(unsigned char *zl, zlexrangespec *range);
 unsigned char *zzlLastInLexRange(unsigned char *zl, zlexrangespec *range);
-zskiplistNode *zslFirstInLexRange(zskiplist *zsl, zlexrangespec *range);
-zskiplistNode *zslLastInLexRange(zskiplist *zsl, zlexrangespec *range);
+zskiplistNode *zslNthInLexRange(zskiplist *zsl, zlexrangespec *range, long n);
 int zzlLexValueGteMin(unsigned char *p, zlexrangespec *spec);
 int zzlLexValueLteMax(unsigned char *p, zlexrangespec *spec);
 int zslLexValueGteMin(sds value, zlexrangespec *spec);
@@ -3013,10 +3054,10 @@ size_t freeMemoryGetNotCountedMemory(void);
 int overMaxmemoryAfterAlloc(size_t moremem);
 uint64_t getCommandFlags(client *c);
 int processCommand(client *c);
+void commandProcessed(client *c);
 int processPendingCommandAndInputBuffer(client *c);
 int processCommandAndResetClient(client *c);
 void setupSignalHandlers(void);
-void removeSignalHandlers(void);
 int createSocketAcceptHandler(connListener *sfd, aeFileProc *accept_handler);
 connListener *listenerByType(const char *typename);
 int changeListener(connListener *listener);
@@ -3050,14 +3091,16 @@ int mustObeyClient(client *c);
 #ifdef __GNUC__
 void _serverLog(int level, const char *fmt, ...)
     __attribute__((format(printf, 2, 3)));
+void serverLogFromHandler(int level, const char *fmt, ...)
+    __attribute__((format(printf, 2, 3)));
 #else
+void serverLogFromHandler(int level, const char *fmt, ...);
 void _serverLog(int level, const char *fmt, ...);
 #endif
 void serverLogRaw(int level, const char *msg);
-void serverLogFromHandler(int level, const char *msg);
+void serverLogRawFromHandler(int level, const char *msg);
 void usage(void);
 void updateDictResizePolicy(void);
-int htNeedsResize(dict *dict);
 void populateCommandTable(void);
 void resetCommandTableStats(dict* commands);
 void resetErrorTableStats(void);
@@ -3087,6 +3130,16 @@ void dismissMemoryInChild(void);
 #define RESTART_SERVER_GRACEFULLY (1<<0)     /* Do proper shutdown. */
 #define RESTART_SERVER_CONFIG_REWRITE (1<<1) /* CONFIG REWRITE before restart.*/
 int restartServer(int flags, mstime_t delay);
+int getKeySlot(sds key);
+int calculateKeySlot(sds key);
+
+/* kvstore wrappers */
+int dbExpand(redisDb *db, uint64_t db_size, int try_expand);
+int dbExpandExpires(redisDb *db, uint64_t db_size, int try_expand);
+dictEntry *dbFind(redisDb *db, void *key);
+dictEntry *dbFindExpires(redisDb *db, void *key);
+unsigned long long dbSize(redisDb *db);
+unsigned long long dbScan(redisDb *db, unsigned long long cursor, dictScanFunction *scan_cb, void *privdata);
 
 /* Set data type */
 robj *setTypeCreate(sds value, size_t size_hint);
@@ -3106,35 +3159,100 @@ void setTypeConvert(robj *subject, int enc);
 int setTypeConvertAndExpand(robj *setobj, int enc, unsigned long cap, int panic);
 robj *setTypeDup(robj *o);
 
+/* Data structure for OBJ_ENCODING_LISTPACK_EX for hash. It contains listpack
+ * and metadata fields for hash field expiration.*/
+typedef struct listpackEx {
+    ExpireMeta meta;  /* To be used in order to register the hash in the
+                         global ebuckets (i.e. db->hexpires) with next,
+                         minimum, hash-field to expire. TTL value might be
+                         inaccurate up-to few seconds due to optimization
+                         consideration.  */
+    sds key;          /* reference to the key, same one that stored in
+                         db->dict. Will be used from active-expiration flow
+                         for notification and deletion of the object, if
+                         needed. */
+    void *lp;         /* listpack that contains 'key-value-ttl' tuples which
+                         are ordered by ttl. */
+} listpackEx;
+
+/* Each dict of hash object that has fields with time-Expiration will have the
+ * following metadata attached to dict header */
+typedef struct dictExpireMetadata {
+    ExpireMeta expireMeta;   /* embedded ExpireMeta in dict.
+                                To be used in order to register the hash in the
+                                global ebuckets (i.e db->hexpires) with next,
+                                minimum, hash-field to expire. TTL value might be
+                                inaccurate up-to few seconds due to optimization
+                                consideration. */
+    ebuckets hfe;            /* DS of Hash Fields Expiration, associated to each hash */
+    sds key;                 /* reference to the key, same one that stored in
+                               db->dict. Will be used from active-expiration flow
+                               for notification and deletion of the object, if
+                               needed. */
+} dictExpireMetadata;
+
 /* Hash data type */
 #define HASH_SET_TAKE_FIELD (1<<0)
 #define HASH_SET_TAKE_VALUE (1<<1)
 #define HASH_SET_COPY 0
 
-void hashTypeConvert(robj *o, int enc);
-void hashTypeTryConversion(robj *subject, robj **argv, int start, int end);
-int hashTypeExists(robj *o, sds key);
-int hashTypeDelete(robj *o, sds key);
-unsigned long hashTypeLength(const robj *o);
+/* Hash field lazy expiration flags. Used by core hashTypeGetValue() and its callers */
+#define HFE_LAZY_EXPIRE           (0) /* Delete expired field, and if last field also the hash */
+#define HFE_LAZY_AVOID_FIELD_DEL  (1<<0) /* Avoid deleting expired field */
+#define HFE_LAZY_AVOID_HASH_DEL   (1<<1) /* Avoid deleting hash if the field is the last one */
+#define HFE_LAZY_NO_NOTIFICATION  (1<<2) /* Do not send notification, used when multiple fields
+                                          * may expire and only one notification is desired. */
+
+void hashTypeConvert(robj *o, int enc, ebuckets *hexpires);
+void hashTypeTryConversion(redisDb *db, robj *subject, robj **argv, int start, int end);
+int hashTypeExists(redisDb *db, robj *o, sds key, int hfeFlags, int *isHashDeleted);
+int hashTypeDelete(robj *o, void *key, int isSdsField);
+unsigned long hashTypeLength(const robj *o, int subtractExpiredFields);
 hashTypeIterator *hashTypeInitIterator(robj *subject);
 void hashTypeReleaseIterator(hashTypeIterator *hi);
-int hashTypeNext(hashTypeIterator *hi);
+int hashTypeNext(hashTypeIterator *hi, int skipExpiredFields);
 void hashTypeCurrentFromListpack(hashTypeIterator *hi, int what,
                                  unsigned char **vstr,
                                  unsigned int *vlen,
-                                 long long *vll);
-sds hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what);
-void hashTypeCurrentObject(hashTypeIterator *hi, int what, unsigned char **vstr, unsigned int *vlen, long long *vll);
+                                 long long *vll,
+                                 uint64_t *expireTime);
+void hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what, char **str,
+                                  size_t *len, uint64_t *expireTime);
+void hashTypeCurrentObject(hashTypeIterator *hi, int what, unsigned char **vstr,
+                           unsigned int *vlen, long long *vll, uint64_t *expireTime);
 sds hashTypeCurrentObjectNewSds(hashTypeIterator *hi, int what);
-robj *hashTypeLookupWriteOrCreate(client *c, robj *key);
-robj *hashTypeGetValueObject(robj *o, sds field);
-int hashTypeSet(robj *o, sds field, sds value, int flags);
-robj *hashTypeDup(robj *o);
+hfield hashTypeCurrentObjectNewHfield(hashTypeIterator *hi);
+robj *hashTypeGetValueObject(redisDb *db, robj *o, sds field, int hfeFlags, int *isHashDeleted);
+int hashTypeSet(redisDb *db, robj *o, sds field, sds value, int flags);
+robj *hashTypeDup(robj *o, sds newkey, uint64_t *minHashExpire);
+uint64_t hashTypeRemoveFromExpires(ebuckets *hexpires, robj *o);
+void hashTypeAddToExpires(redisDb *db, sds key, robj *hashObj, uint64_t expireTime);
+void hashTypeFree(robj *o);
+int hashTypeIsExpired(const robj *o, uint64_t expireAt);
+unsigned char *hashTypeListpackGetLp(robj *o);
+uint64_t hashTypeGetMinExpire(robj *o, int accurate);
+void hashTypeUpdateKeyRef(robj *o, sds newkey);
+ebuckets *hashTypeGetDictMetaHFE(dict *d);
+void initDictExpireMetadata(sds key, robj *o);
+struct listpackEx *listpackExCreate(void);
+void listpackExAddNew(robj *o, char *field, size_t flen,
+                      char *value, size_t vlen, uint64_t expireAt);
+
+/* Hash-Field data type (of t_hash.c) */
+hfield hfieldNew(const void *field, size_t fieldlen, int withExpireMeta);
+hfield hfieldTryNew(const void *field, size_t fieldlen, int withExpireMeta);
+int hfieldIsExpireAttached(hfield field);
+int hfieldIsExpired(hfield field);
+uint64_t hfieldGetExpireTime(hfield field);
+static inline void hfieldFree(hfield field) { mstrFree(&mstrFieldKind, field); }
+static inline void *hfieldGetAllocPtr(hfield field) { return mstrGetAllocPtr(&mstrFieldKind, field); }
+static inline size_t hfieldlen(hfield field) { return mstrlen(field);}
+uint64_t hfieldGetExpireTime(hfield field);
 
 /* Pub / Sub */
 int pubsubUnsubscribeAllChannels(client *c, int notify);
 int pubsubUnsubscribeShardAllChannels(client *c, int notify);
-void pubsubUnsubscribeShardChannels(robj **channels, unsigned int count);
+void pubsubShardUnsubscribeAllChannelsInSlot(unsigned int slot);
 int pubsubUnsubscribeAllPatterns(client *c, int notify);
 int pubsubPublishMessage(robj *channel, robj *message, int sharded);
 int pubsubPublishMessageAndPropagateToCluster(robj *channel, robj *message, int sharded);
@@ -3142,9 +3260,13 @@ void addReplyPubsubMessage(client *c, robj *channel, robj *msg, robj *message_bu
 int serverPubsubSubscriptionCount(void);
 int serverPubsubShardSubscriptionCount(void);
 size_t pubsubMemOverhead(client *c);
+void unmarkClientAsPubSub(client *c);
+int pubsubTotalSubscriptions(void);
+dict *getClientPubSubChannels(client *c);
+dict *getClientPubSubShardChannels(client *c);
 
 /* Keyspace events notification */
-void notifyKeyspaceEvent(int type, char *event, robj *key, int dbid);
+void notifyKeyspaceEvent(int type, const char *event, robj *key, int dbid);
 int keyspaceEventsStringToFlags(char *classes);
 sds keyspaceEventsFlagsToString(int flags);
 
@@ -3228,6 +3350,7 @@ int keyIsExpired(redisDb *db, robj *key);
 long long getExpire(redisDb *db, robj *key);
 void setExpire(client *c, redisDb *db, robj *key, long long when);
 int checkAlreadyExpired(long long when);
+int parseExtendedExpireArgumentsOrReply(client *c, int *flags);
 robj *lookupKeyRead(redisDb *db, robj *key);
 robj *lookupKeyWrite(redisDb *db, robj *key);
 robj *lookupKeyReadOrReply(client *c, robj *key, robj *reply);
@@ -3246,7 +3369,7 @@ int objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle,
 #define LOOKUP_NOEXPIRE (1<<4) /* Avoid deleting lazy expired keys. */
 #define LOOKUP_NOEFFECTS (LOOKUP_NONOTIFY | LOOKUP_NOSTATS | LOOKUP_NOTOUCH | LOOKUP_NOEXPIRE) /* Avoid any effects from fetching the key */
 
-void dbAdd(redisDb *db, robj *key, robj *val);
+dictEntry *dbAdd(redisDb *db, robj *key, robj *val);
 int dbAddRDBLoad(redisDb *db, sds key, robj *val);
 void dbReplaceValue(redisDb *db, robj *key, robj *val);
 
@@ -3276,8 +3399,8 @@ void discardTempDb(redisDb *tempDb, void(callback)(dict*));
 int selectDb(client *c, int id);
 void signalModifiedKey(client *c, redisDb *db, robj *key);
 void signalFlushedDb(int dbid, int async);
-void scanGenericCommand(client *c, robj *o, unsigned long cursor);
-int parseScanCursorOrReply(client *c, robj *o, unsigned long *cursor);
+void scanGenericCommand(client *c, robj *o, unsigned long long cursor);
+int parseScanCursorOrReply(client *c, robj *o, unsigned long long *cursor);
 int dbAsyncDelete(redisDb *db, robj *key);
 void emptyDbAsync(redisDb *db);
 size_t lazyfreeGetPendingObjectsCount(void);
@@ -3341,9 +3464,9 @@ void scriptingInit(int setup);
 int ldbRemoveChild(pid_t pid);
 void ldbKillForkedSessions(void);
 int ldbPendingChildren(void);
-sds luaCreateFunction(client *c, robj *body);
 void luaLdbLineHook(lua_State *lua, lua_Debug *ar);
-void freeLuaScriptsAsync(dict *lua_scripts);
+void freeLuaScriptsSync(dict *lua_scripts, list *lua_scripts_lru_list, lua_State *lua);
+void freeLuaScriptsAsync(dict *lua_scripts, list *lua_scripts_lru_list, lua_State *lua);
 void freeFunctionsAsync(functionsLibCtx *lib_ctx);
 int ldbIsEnabled(void);
 void ldbLog(sds entry);
@@ -3359,6 +3482,7 @@ int isInsideYieldingLongCommand(void);
 typedef struct luaScript {
     uint64_t flags;
     robj *body;
+    listNode *node;  /* list node in lua_scripts_lru_list list. */
 } luaScript;
 /* Cache of recently used small arguments to avoid malloc calls. */
 #define LUA_CMD_OBJCACHE_SIZE 32
@@ -3385,7 +3509,7 @@ void blockForAofFsync(client *c, mstime_t timeout, long long offset, int numloca
 void signalDeletedKeyAsReady(redisDb *db, robj *key, int type);
 void updateStatsOnUnblock(client *c, long blocked_us, long reply_us, int had_errors);
 void scanDatabaseForDeletedKeys(redisDb *emptied, redisDb *replaced_with);
-void totalNumberOfBlockingKeys(unsigned long *blocking_keys, unsigned long *bloking_keys_on_nokey);
+void totalNumberOfStatefulKeys(unsigned long *blocking_keys, unsigned long *bloking_keys_on_nokey, unsigned long *watched_keys);
 void blockedBeforeSleep(void);
 
 /* timeout.c -- Blocked clients timeout and connections timeout. */
@@ -3400,6 +3524,7 @@ void expireSlaveKeys(void);
 void rememberSlaveKeyWithExpire(redisDb *db, robj *key);
 void flushSlaveKeysWithExpireList(void);
 size_t getSlaveKeyWithExpireCount(void);
+uint64_t hashTypeDbActiveExpire(redisDb *db, uint32_t maxFieldsToExpire);
 
 /* evict.c -- maxmemory handling and LRU eviction. */
 void evictionPoolAlloc(void);
@@ -3415,8 +3540,10 @@ void startEvictionTimeProc(void);
 
 /* Keys hashing / comparison functions for dict.c hash tables. */
 uint64_t dictSdsHash(const void *key);
+uint64_t dictPtrHash(const void *key);
 uint64_t dictSdsCaseHash(const void *key);
 int dictSdsKeyCompare(dict *d, const void *key1, const void *key2);
+int dictSdsMstrKeyCompare(dict *d, const void *sdsLookup, const void *mstrStored);
 int dictSdsKeyCaseCompare(dict *d, const void *key1, const void *key2);
 void dictSdsDestructor(dict *d, void *val);
 void dictListDestructor(dict *d, void *val);
@@ -3572,6 +3699,15 @@ void strlenCommand(client *c);
 void zrankCommand(client *c);
 void zrevrankCommand(client *c);
 void hsetCommand(client *c);
+void hpexpireCommand(client *c);
+void hexpireCommand(client *c);
+void hpexpireatCommand(client *c);
+void hexpireatCommand(client *c);
+void httlCommand(client *c);
+void hpttlCommand(client *c);
+void hexpiretimeCommand(client *c);
+void hpexpiretimeCommand(client *c);
+void hpersistCommand(client *c);
 void hsetnxCommand(client *c);
 void hgetCommand(client *c);
 void hmgetCommand(client *c);
@@ -3704,7 +3840,9 @@ void _serverPanic(const char *file, int line, const char *msg, ...)
 void _serverPanic(const char *file, int line, const char *msg, ...);
 #endif
 void serverLogObjectDebugInfo(const robj *o);
-void sigsegvHandler(int sig, siginfo_t *info, void *secret);
+void setupDebugSigHandlers(void);
+void setupSigSegvHandler(void);
+void removeSigSegvHandlers(void);
 const char *getSafeInfoString(const char *s, size_t len, char **tmp);
 dict *genInfoSectionDict(robj **argv, int argc, char **defaults, int *out_all, int *out_everything);
 void releaseInfoSectionDict(dict *sec);
@@ -3723,6 +3861,7 @@ void killIOThreads(void);
 void killThreads(void);
 void makeThreadKillable(void);
 void swapMainDbWithTempDb(redisDb *tempDb);
+sds getVersion(void);
 
 /* Use macro for checking log level to avoid evaluating arguments in cases log
  * should be ignored due to low level. */
diff --git a/src/setproctitle.c b/src/setproctitle.c
index 019402348b6..cc31845ef06 100644
--- a/src/setproctitle.c
+++ b/src/setproctitle.c
@@ -2,7 +2,7 @@
  * setproctitle.c - Linux/Darwin setproctitle.
  * --------------------------------------------------------------------------
  * Copyright (C) 2010  William Ahern
- * Copyright (C) 2013  Salvatore Sanfilippo
+ * Copyright (C) 2013-current  Redis Ltd.
  * Copyright (C) 2013  Stam He
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
diff --git a/src/siphash.c b/src/siphash.c
index 2713d89ee41..9c3e6ce9e19 100644
--- a/src/siphash.c
+++ b/src/siphash.c
@@ -4,7 +4,7 @@
    Copyright (c) 2012-2016 Jean-Philippe Aumasson
    <jeanphilippe.aumasson@gmail.com>
    Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>
-   Copyright (c) 2017 Salvatore Sanfilippo <antirez@gmail.com>
+   Copyright (c) 2017-current Redis Ltd.
 
    To the extent possible under law, the author(s) have dedicated all copyright
    and related and neighboring rights to this software to the public domain
@@ -39,7 +39,6 @@
       the function in the new form (returning an uint64_t) using just the
       relevant test vector.
  */
-#include <assert.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
diff --git a/src/slowlog.c b/src/slowlog.c
index 4c31917bb3b..01a248b487f 100644
--- a/src/slowlog.c
+++ b/src/slowlog.c
@@ -10,32 +10,11 @@
  *
  * ----------------------------------------------------------------------------
  *
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 
@@ -121,7 +100,7 @@ void slowlogInit(void) {
  * This function will make sure to trim the slow log accordingly to the
  * configured max length. */
 void slowlogPushEntryIfNeeded(client *c, robj **argv, int argc, long long duration) {
-    if (server.slowlog_log_slower_than < 0) return; /* Slowlog disabled */
+    if (server.slowlog_log_slower_than < 0 || server.slowlog_max_len == 0) return; /* Slowlog disabled */
     if (duration >= server.slowlog_log_slower_than)
         listAddNodeHead(server.slowlog,
                         slowlogCreateEntry(c,argv,argc,duration));
diff --git a/src/slowlog.h b/src/slowlog.h
index 6a00d120240..c22f312dc31 100644
--- a/src/slowlog.h
+++ b/src/slowlog.h
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __SLOWLOG_H__
diff --git a/src/socket.c b/src/socket.c
index dad8e93cca5..33c28588a5f 100644
--- a/src/socket.c
+++ b/src/socket.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2019, Redis Labs
+ * Copyright (c) 2019-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -309,7 +288,8 @@ static void connSocketEventHandler(struct aeEventLoop *el, int fd, void *clientD
 }
 
 static void connSocketAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
-    int cport, cfd, max = MAX_ACCEPTS_PER_CALL;
+    int cport, cfd;
+    int max = server.max_new_conns_per_cycle;
     char cip[NET_IP_STR_LEN];
     UNUSED(el);
     UNUSED(mask);
@@ -360,6 +340,7 @@ static int connSocketBlockingConnect(connection *conn, const char *addr, int por
     if ((aeWait(fd, AE_WRITABLE, timeout) & AE_WRITABLE) == 0) {
         conn->state = CONN_STATE_ERROR;
         conn->last_errno = ETIMEDOUT;
+        return C_ERR;
     }
 
     conn->fd = fd;
diff --git a/src/solarisfixes.h b/src/solarisfixes.h
index 3e53ba67c2c..1141fba326e 100644
--- a/src/solarisfixes.h
+++ b/src/solarisfixes.h
@@ -1,31 +1,10 @@
 /* Solaris specific fixes.
  *
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #if defined(__sun)
diff --git a/src/sort.c b/src/sort.c
index 77f4cbbc4c6..2dcea1754f5 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -1,37 +1,17 @@
 /* SORT command and helper functions.
  *
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 
 #include "server.h"
 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
 #include <math.h> /* isnan() */
+#include "cluster.h"
 
 zskiplistNode* zslGetElementByRank(zskiplist *zsl, unsigned long rank);
 
@@ -114,7 +94,12 @@ robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) {
 
         /* Retrieve value from hash by the field name. The returned object
          * is a new object with refcount already incremented. */
-        o = hashTypeGetValueObject(o, fieldobj->ptr);
+        int isHashDeleted;
+        o = hashTypeGetValueObject(db, o, fieldobj->ptr, HFE_LAZY_EXPIRE, &isHashDeleted);
+
+        if (isHashDeleted)
+            goto noobj;
+
     } else {
         if (o->type != OBJ_STRING) goto noobj;
 
@@ -235,10 +220,12 @@ void sortCommandGeneric(client *c, int readonly) {
             if (strchr(c->argv[j+1]->ptr,'*') == NULL) {
                 dontsort = 1;
             } else {
-                /* If BY is specified with a real pattern, we can't accept
-                 * it in cluster mode. */
-                if (server.cluster_enabled) {
-                    addReplyError(c,"BY option of SORT denied in Cluster mode.");
+                /* If BY is specified with a real pattern, we can't accept it in cluster mode,
+                 * unless we can make sure the keys formed by the pattern are in the same slot 
+                 * as the key to sort. */
+                if (server.cluster_enabled && patternHashSlot(sortby->ptr, sdslen(sortby->ptr)) != getKeySlot(c->argv[1]->ptr)) {
+                    addReplyError(c, "BY option of SORT denied in Cluster mode when "
+                                 "keys formed by the pattern may be in different slots.");
                     syntax_error++;
                     break;
                 }
@@ -252,8 +239,12 @@ void sortCommandGeneric(client *c, int readonly) {
             }
             j++;
         } else if (!strcasecmp(c->argv[j]->ptr,"get") && leftargs >= 1) {
-            if (server.cluster_enabled) {
-                addReplyError(c,"GET option of SORT denied in Cluster mode.");
+            /* If GET is specified with a real pattern, we can't accept it in cluster mode,
+             * unless we can make sure the keys formed by the pattern are in the same slot 
+             * as the key to sort. */
+            if (server.cluster_enabled && patternHashSlot(c->argv[j+1]->ptr, sdslen(c->argv[j+1]->ptr)) != getKeySlot(c->argv[1]->ptr)) {
+                addReplyError(c, "GET option of SORT denied in Cluster mode when "
+                              "keys formed by the pattern may be in different slots.");
                 syntax_error++;
                 break;
             }
@@ -297,8 +288,7 @@ void sortCommandGeneric(client *c, int readonly) {
     if (sortval)
         incrRefCount(sortval);
     else
-        sortval = createQuicklistObject();
-
+        sortval = createQuicklistObject(server.list_max_listpack_size, server.list_compress_depth);
 
     /* When sorting a set with no sort specified, we must sort the output
      * so the result is consistent across scripting and replication.
@@ -550,7 +540,7 @@ void sortCommandGeneric(client *c, int readonly) {
     } else {
         /* We can't predict the size and encoding of the stored list, we
          * assume it's a large list and then convert it at the end if needed. */
-        robj *sobj = createQuicklistObject();
+        robj *sobj = createQuicklistObject(server.list_max_listpack_size, server.list_compress_depth);
 
         /* STORE option specified, set the sorting result as a List object */
         for (j = start; j <= end; j++) {
diff --git a/src/sparkline.c b/src/sparkline.c
index 4c0f2b81f17..65a0bf91046 100644
--- a/src/sparkline.c
+++ b/src/sparkline.c
@@ -5,29 +5,11 @@
  *
  * ---------------------------------------------------------------------------
  *
- * Copyright(C) 2011-2014 Salvatore Sanfilippo <antirez@gmail.com>
+ * Copyright (c) 2011-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
diff --git a/src/sparkline.h b/src/sparkline.h
index 6025d2b98b9..e27a8b8595f 100644
--- a/src/sparkline.h
+++ b/src/sparkline.h
@@ -2,29 +2,11 @@
  *
  * ---------------------------------------------------------------------------
  *
- * Copyright(C) 2011-2014 Salvatore Sanfilippo <antirez@gmail.com>
+ * Copyright (c) 2011-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __SPARKLINE_H
diff --git a/src/stream.h b/src/stream.h
index bfc165440e4..146be3b12e4 100644
--- a/src/stream.h
+++ b/src/stream.h
@@ -116,7 +116,7 @@ struct client;
 stream *streamNew(void);
 void freeStream(stream *s);
 unsigned long streamLength(const robj *subject);
-size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end, size_t count, int rev, streamCG *group, streamConsumer *consumer, int flags, streamPropInfo *spi);
+size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end, size_t count, int rev, streamCG *group, streamConsumer *consumer, int flags, streamPropInfo *spi, unsigned long *propCount);
 void streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamID *end, int rev);
 int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields);
 void streamIteratorGetField(streamIterator *si, unsigned char **fieldptr, unsigned char **valueptr, int64_t *fieldlen, int64_t *valuelen);
diff --git a/src/syncio.c b/src/syncio.c
index b2843d5fbcd..721989878a8 100644
--- a/src/syncio.c
+++ b/src/syncio.c
@@ -1,31 +1,10 @@
 /* Synchronous socket and file I/O operations useful across the core.
  *
- * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
diff --git a/src/syscheck.c b/src/syscheck.c
index 0ea3a2510b0..4659c3d5cb3 100644
--- a/src/syscheck.c
+++ b/src/syscheck.c
@@ -1,31 +1,9 @@
 /*
- * Copyright (c) 2022, Redis Ltd.
- * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2016-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 #include "fmacros.h"
 #include "config.h"
diff --git a/src/syscheck.h b/src/syscheck.h
index 096f0f5e631..46933cd6126 100644
--- a/src/syscheck.h
+++ b/src/syscheck.h
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2022, Redis Ltd.
+ * Copyright (c) 2022-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __SYSCHECK_H
diff --git a/src/t_hash.c b/src/t_hash.c
index b199d8c69c5..45156f46e5d 100644
--- a/src/t_hash.c
+++ b/src/t_hash.c
@@ -1,35 +1,587 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
+#include "ebuckets.h"
 #include <math.h>
 
+/* Threshold for HEXPIRE and HPERSIST to be considered whether it is worth to
+ * update the expiration time of the hash object in global HFE DS. */
+#define HASH_NEW_EXPIRE_DIFF_THRESHOLD max(4000, 1<<EB_BUCKET_KEY_PRECISION)
+
+/* Reserve 2 bits out of hash-field expiration time for possible future lightweight
+ * indexing/categorizing of fields. It can be achieved by hacking HFE as follows:
+ *
+ *    HPEXPIREAT key [ 2^47 + USER_INDEX ] FIELDS numfields field [field …]
+ *
+ * Redis will also need to expose kind of HEXPIRESCAN and HEXPIRECOUNT for this
+ * idea. Yet to be better defined.
+ *
+ * HFE_MAX_ABS_TIME_MSEC constraint must be enforced only at API level. Internally,
+ * the expiration time can be up to EB_EXPIRE_TIME_MAX for future readiness.
+ */
+#define HFE_MAX_ABS_TIME_MSEC (EB_EXPIRE_TIME_MAX >> 2)
+
+typedef enum GetFieldRes {
+    /* common (Used by hashTypeGet* value family) */
+    GETF_OK = 0,            /* The field was found. */
+    GETF_NOT_FOUND,         /* The field was not found. */
+    GETF_EXPIRED,           /* Logically expired (Might be lazy deleted or not) */
+    GETF_EXPIRED_HASH,      /* Delete hash since retrieved field was expired and
+                             * it was the last field in the hash. */
+} GetFieldRes;
+
+/* ActiveExpireCtx passed to hashTypeActiveExpire() */
+typedef struct ExpireCtx {
+    uint32_t fieldsToExpireQuota;
+    redisDb *db;
+} ExpireCtx;
+
+typedef listpackEntry CommonEntry; /* extend usage beyond lp */
+
+/* hash field expiration (HFE) funcs */
+static ExpireAction onFieldExpire(eItem item, void *ctx);
+static ExpireMeta* hfieldGetExpireMeta(const eItem field);
+static ExpireMeta *hashGetExpireMeta(const eItem hash);
+static void hexpireGenericCommand(client *c, const char *cmd, long long basetime, int unit);
+static ExpireAction hashTypeActiveExpire(eItem hashObj, void *ctx);
+static uint64_t hashTypeExpire(robj *o, ExpireCtx *expireCtx, int updateGlobalHFE);
+static void hfieldPersist(robj *hashObj, hfield field);
+static void propagateHashFieldDeletion(redisDb *db, sds key, char *field, size_t fieldLen);
+
+/* hash dictType funcs */
+static int dictHfieldKeyCompare(dict *d, const void *key1, const void *key2);
+static uint64_t dictMstrHash(const void *key);
+static void dictHfieldDestructor(dict *d, void *field);
+static size_t hashDictWithExpireMetadataBytes(dict *d);
+static void hashDictWithExpireOnRelease(dict *d);
+static robj* hashTypeLookupWriteOrCreate(client *c, robj *key);
+
+/*-----------------------------------------------------------------------------
+ * Define dictType of hash
+ *
+ * - Stores fields as mstr strings with optional metadata to attach TTL
+ * - Note that small hashes are represented with listpacks
+ * - Once expiration is set for a field, the dict instance and corresponding
+ *   dictType are replaced with a dict containing metadata for Hash Field
+ *   Expiration (HFE) and using dictType `mstrHashDictTypeWithHFE`
+ *----------------------------------------------------------------------------*/
+dictType mstrHashDictType = {
+    dictSdsHash,                                /* lookup hash function */
+    NULL,                                       /* key dup */
+    NULL,                                       /* val dup */
+    dictSdsMstrKeyCompare,                      /* lookup key compare */
+    dictHfieldDestructor,                       /* key destructor */
+    dictSdsDestructor,                          /* val destructor */
+    .storedHashFunction = dictMstrHash,         /* stored hash function */
+    .storedKeyCompare = dictHfieldKeyCompare,   /* stored key compare */
+};
+
+/* Define alternative dictType of hash with hash-field expiration (HFE) support */
+dictType mstrHashDictTypeWithHFE = {
+    dictSdsHash,                                /* lookup hash function */
+    NULL,                                       /* key dup */
+    NULL,                                       /* val dup */
+    dictSdsMstrKeyCompare,                      /* lookup key compare */
+    dictHfieldDestructor,                       /* key destructor */
+    dictSdsDestructor,                          /* val destructor */
+    .storedHashFunction = dictMstrHash,         /* stored hash function */
+    .storedKeyCompare = dictHfieldKeyCompare,   /* stored key compare */
+    .dictMetadataBytes = hashDictWithExpireMetadataBytes,
+    .onDictRelease = hashDictWithExpireOnRelease,
+};
+
+/*-----------------------------------------------------------------------------
+ * Hash Field Expiration (HFE) Feature
+ *
+ * Each hash instance maintains its own set of hash field expiration within its
+ * private ebuckets DS. In order to support HFE active expire cycle across hash
+ * instances, hashes with associated HFE will be also registered in a global
+ * ebuckets DS with expiration time value that reflects their next minimum
+ * time to expire. The global HFE Active expiration will be triggered from
+ * activeExpireCycle() function and will invoke "local" HFE Active expiration
+ * for each hash instance that has expired fields.
+ *
+ * hashExpireBucketsType - ebuckets-type to be used at the global space
+ * (db->hexpires) to register hashes that have one or more fields with time-Expiration.
+ * The hashes will be registered in with the expiration time of the earliest field
+ * in the hash.
+ *----------------------------------------------------------------------------*/
+EbucketsType hashExpireBucketsType = {
+    .onDeleteItem = NULL,
+    .getExpireMeta = hashGetExpireMeta,   /* get ExpireMeta attached to each hash */
+    .itemsAddrAreOdd = 0,                 /* Addresses of dict are even */
+};
+
+/* dictExpireMetadata - ebuckets-type for hash fields with time-Expiration. ebuckets
+ * instance Will be attached to each hash that has at least one field with expiry
+ * time. */
+EbucketsType hashFieldExpireBucketsType = {
+    .onDeleteItem = NULL,
+    .getExpireMeta = hfieldGetExpireMeta, /* get ExpireMeta attached to each field */
+    .itemsAddrAreOdd = 1,                 /* Addresses of hfield (mstr) are odd!! */
+};
+
+/* OnFieldExpireCtx passed to OnFieldExpire() */
+typedef struct OnFieldExpireCtx {
+    robj *hashObj;
+    redisDb *db;
+} OnFieldExpireCtx;
+
+/* The implementation of hashes by dict was modified from storing fields as sds
+ * strings to store "mstr" (Immutable string with metadata) in order to be able to
+ * attach TTL (ExpireMeta) to the hash-field. This usage of mstr opens up the
+ * opportunity for future features to attach additional metadata by need to the
+ * fields.
+ *
+ * The following defines new hfield kind of mstr */
+typedef enum HfieldMetaFlags {
+    HFIELD_META_EXPIRE = 0,
+} HfieldMetaFlags;
+
+mstrKind mstrFieldKind = {
+    .name = "hField",
+
+    /* Taking care that all metaSize[*] values are even ensures that all
+     * addresses of hfield instances will be odd. */
+    .metaSize[HFIELD_META_EXPIRE] = sizeof(ExpireMeta),
+};
+static_assert(sizeof(struct ExpireMeta ) % 2 == 0, "must be even!");
+
+/* Used by hpersistCommand() */
+typedef enum SetPersistRes {
+    HFE_PERSIST_NO_FIELD =     -2,   /* No such hash-field */
+    HFE_PERSIST_NO_TTL =       -1,   /* No TTL attached to the field */
+    HFE_PERSIST_OK =            1
+} SetPersistRes;
+
+static inline int isDictWithMetaHFE(dict *d) {
+    return d->type == &mstrHashDictTypeWithHFE;
+}
+
+/*-----------------------------------------------------------------------------
+ * setex* - Set field's expiration
+ *
+ * Setting expiration time to fields might be time-consuming and complex since
+ * each update of expiration time, not only updates `ebuckets` of corresponding
+ * hash, but also might update `ebuckets` of global HFE DS. It is required to opt
+ * sequence of field updates with expirartion for a given hash, such that only
+ * once done, the global HFE DS will get updated.
+ *
+ * To do so, follow the scheme:
+ * 1. Call hashTypeSetExInit() to initialize the HashTypeSetEx struct.
+ * 2. Call hashTypeSetEx() one time or more, for each field/expiration update.
+ * 3. Call hashTypeSetExDone() for notification and update of global HFE.
+ *----------------------------------------------------------------------------*/
+
+/* Returned value of hashTypeSetEx() */
+typedef enum SetExRes {
+    HSETEX_OK =                1,   /* Expiration time set/updated as expected */
+    HSETEX_NO_FIELD =         -2,   /* No such hash-field */
+    HSETEX_NO_CONDITION_MET =  0,   /* Specified NX | XX | GT | LT condition not met */
+    HSETEX_DELETED =           2,   /* Field deleted because the specified time is in the past */
+} SetExRes;
+
+/* Used by httlGenericCommand() */
+typedef enum GetExpireTimeRes {
+    HFE_GET_NO_FIELD =          -2, /* No such hash-field */
+    HFE_GET_NO_TTL =            -1, /* No TTL attached to the field */
+} GetExpireTimeRes;
+
+typedef enum ExpireSetCond {
+    HFE_NX = 1<<0,
+    HFE_XX = 1<<1,
+    HFE_GT = 1<<2,
+    HFE_LT = 1<<3
+} ExpireSetCond;
+
+/* Used by hashTypeSetEx() for setting fields or their expiry  */
+typedef struct HashTypeSetEx {
+
+    /*** config ***/
+    ExpireSetCond expireSetCond;        /* [XX | NX | GT | LT] */
+
+    /*** metadata ***/
+    uint64_t minExpire;                 /* if uninit EB_EXPIRE_TIME_INVALID */
+    redisDb *db;
+    robj *key, *hashObj;
+    uint64_t minExpireFields;           /* Trace updated fields and their previous/new
+                                         * minimum expiration time. If minimum recorded
+                                         * is above minExpire of the hash, then we don't
+                                         * have to update global HFE DS */
+    int fieldDeleted;                   /* Number of fields deleted */
+    int fieldUpdated;                   /* Number of fields updated */
+
+    /* Optionally provide client for notification */
+    client *c;
+    const char *cmd;
+} HashTypeSetEx;
+
+int hashTypeSetExInit(robj *key, robj *o, client *c, redisDb *db, const char *cmd,
+                      ExpireSetCond expireSetCond, HashTypeSetEx *ex);
+
+SetExRes hashTypeSetEx(robj *o, sds field, uint64_t expireAt, HashTypeSetEx *exInfo);
+
+void hashTypeSetExDone(HashTypeSetEx *e);
+
+/*-----------------------------------------------------------------------------
+ * Accessor functions for dictType of hash
+ *----------------------------------------------------------------------------*/
+
+static int dictHfieldKeyCompare(dict *d, const void *key1, const void *key2)
+{
+    int l1,l2;
+    UNUSED(d);
+
+    l1 = hfieldlen((hfield)key1);
+    l2 = hfieldlen((hfield)key2);
+    if (l1 != l2) return 0;
+    return memcmp(key1, key2, l1) == 0;
+}
+
+static uint64_t dictMstrHash(const void *key) {
+    return dictGenHashFunction((unsigned char*)key, mstrlen((char*)key));
+}
+
+static void dictHfieldDestructor(dict *d, void *field) {
+
+    /* If attached TTL to the field, then remove it from hash's private ebuckets. */
+    if (hfieldGetExpireTime(field) != EB_EXPIRE_TIME_INVALID) {
+        dictExpireMetadata *dictExpireMeta = (dictExpireMetadata *) dictMetadata(d);
+        ebRemove(&dictExpireMeta->hfe, &hashFieldExpireBucketsType, field);
+    }
+
+    hfieldFree(field);
+
+    /* Don't have to update global HFE DS. It's unnecessary. Implementing this
+     * would introduce significant complexity and overhead for an operation that
+     * isn't critical. In the worst case scenario, the hash will be efficiently
+     * updated later by an active-expire operation, or it will be removed by the
+     * hash's dbGenericDelete() function. */
+}
+
+static size_t hashDictWithExpireMetadataBytes(dict *d) {
+    UNUSED(d);
+    /* expireMeta of the hash, ref to ebuckets and pointer to hash's key */
+    return sizeof(dictExpireMetadata);
+}
+
+static void hashDictWithExpireOnRelease(dict *d) {
+    /* for sure allocated with metadata. Otherwise, this func won't be registered */
+    dictExpireMetadata *dictExpireMeta = (dictExpireMetadata *) dictMetadata(d);
+    ebDestroy(&dictExpireMeta->hfe, &hashFieldExpireBucketsType, NULL);
+}
+
+/*-----------------------------------------------------------------------------
+ * listpackEx functions
+ *----------------------------------------------------------------------------*/
+/*
+ * If any of hash field expiration command is called on a listpack hash object
+ * for the first time, we convert it to OBJ_ENCODING_LISTPACK_EX encoding.
+ * We allocate "struct listpackEx" which holds listpack pointer and metadata to
+ * register key to the global DS. In the listpack, we append another TTL entry
+ * for each field-value pair. From now on, listpack will have triplets in it:
+ * field-value-ttl. If TTL is not set for a field, we store 'zero' as the TTL
+ * value. 'zero' is encoded as two bytes in the listpack. Memory overhead of a
+ * non-existing TTL will be two bytes per field.
+ *
+ * Fields in the listpack will be ordered by TTL. Field with the smallest expiry
+ * time will be the first item. Fields without TTL will be at the end of the
+ * listpack. This way, it is easier/faster to find expired items.
+ */
+
+#define HASH_LP_NO_TTL 0
+
+struct listpackEx *listpackExCreate(void) {
+    listpackEx *lpt = zcalloc(sizeof(*lpt));
+    lpt->meta.trash = 1;
+    lpt->lp = NULL;
+    lpt->key = NULL;
+    return lpt;
+}
+
+static void listpackExFree(listpackEx *lpt) {
+    lpFree(lpt->lp);
+    zfree(lpt);
+}
+
+struct lpFingArgs {
+    uint64_t max_to_search; /* [in] Max number of tuples to search */
+    uint64_t expire_time;   /* [in] Find the tuple that has a TTL larger than expire_time */
+    unsigned char *p;       /* [out] First item of the tuple that has a TTL larger than expire_time */
+    int expired;            /* [out] Number of tuples that have TTLs less than expire_time */
+    int index;              /* Internally used */
+    unsigned char *fptr;    /* Internally used, temp ptr */
+};
+
+/* Callback for lpFindCb(). Used to find number of expired fields as part of
+ * active expiry or when trying to find the position for the new field according
+ * to its expiry time.*/
+static int cbFindInListpack(const unsigned char *lp, unsigned char *p,
+                            void *user, unsigned char *s, long long slen)
+{
+    (void) lp;
+    struct lpFingArgs *r = user;
+
+    r->index++;
+
+    if (r->max_to_search == 0)
+        return 0; /* Break the loop and return */
+
+    if (r->index % 3 == 1) {
+        r->fptr = p;  /* First item of the tuple. */
+    } else if (r->index % 3 == 0) {
+        serverAssert(!s);
+
+        /* Third item of a tuple is expiry time */
+        if (slen == HASH_LP_NO_TTL || (uint64_t) slen >= r->expire_time) {
+            r->p = r->fptr;
+            return 0; /* Break the loop and return */
+        }
+        r->expired++;
+        r->max_to_search--;
+    }
+
+    return 1;
+}
+
+/* Returns number of expired fields. */
+static uint64_t listpackExExpireDryRun(const robj *o) {
+    serverAssert(o->encoding == OBJ_ENCODING_LISTPACK_EX);
+
+    listpackEx *lpt = o->ptr;
+
+    struct lpFingArgs r = {
+        .max_to_search = UINT64_MAX,
+        .expire_time = commandTimeSnapshot(),
+    };
+
+    lpFindCb(lpt->lp, NULL, &r, cbFindInListpack, 0);
+    return r.expired;
+}
+
+/* Returns the expiration time of the item with the nearest expiration. */
+static uint64_t listpackExGetMinExpire(robj *o) {
+    serverAssert(o->encoding == OBJ_ENCODING_LISTPACK_EX);
+
+    long long expireAt;
+    unsigned char *fptr;
+    listpackEx *lpt = o->ptr;
+
+    /* As fields are ordered by expire time, first field will have the smallest
+     * expiry time. Third element is the expiry time of the first field */
+    fptr = lpSeek(lpt->lp, 2);
+    if (fptr != NULL) {
+        serverAssert(lpGetIntegerValue(fptr, &expireAt));
+
+        /* Check if this is a non-volatile field. */
+        if (expireAt != HASH_LP_NO_TTL)
+            return expireAt;
+    }
+
+    return EB_EXPIRE_TIME_INVALID;
+}
+
+/* Walk over fields and delete the expired ones. */
+void listpackExExpire(redisDb *db, robj *o, ExpireInfo *info) {
+    serverAssert(o->encoding == OBJ_ENCODING_LISTPACK_EX);
+    uint64_t expired = 0, min = EB_EXPIRE_TIME_INVALID;
+    unsigned char *ptr;
+    listpackEx *lpt = o->ptr;
+
+    ptr = lpFirst(lpt->lp);
+
+    while (ptr != NULL && (info->itemsExpired < info->maxToExpire)) {
+        long long val;
+        int64_t flen;
+        unsigned char intbuf[LP_INTBUF_SIZE], *fref;
+
+        fref = lpGet(ptr, &flen, intbuf);
+
+        ptr = lpNext(lpt->lp, ptr);
+        serverAssert(ptr);
+        ptr = lpNext(lpt->lp, ptr);
+        serverAssert(ptr && lpGetIntegerValue(ptr, &val));
+
+        /* Fields are ordered by expiry time. If we reached to a non-expired
+         * or a non-volatile field, we know rest is not yet expired. */
+        if (val == HASH_LP_NO_TTL || (uint64_t) val > info->now)
+            break;
+
+        propagateHashFieldDeletion(db, ((listpackEx *) o->ptr)->key, (char *)((fref) ? fref : intbuf), flen);
+
+        ptr = lpNext(lpt->lp, ptr);
+
+        info->itemsExpired++;
+        expired++;
+    }
+
+    if (expired)
+        lpt->lp = lpDeleteRange(lpt->lp, 0, expired * 3);
+
+    min = hashTypeGetMinExpire(o, 1 /*accurate*/);
+    info->nextExpireTime = min;
+}
+
+static void listpackExAddInternal(robj *o, listpackEntry ent[3]) {
+    listpackEx *lpt = o->ptr;
+
+    /* Shortcut, just append at the end if this is a non-volatile field. */
+    if (ent[2].lval == HASH_LP_NO_TTL) {
+        lpt->lp = lpBatchAppend(lpt->lp, ent, 3);
+        return;
+    }
+
+    struct lpFingArgs r = {
+            .max_to_search = UINT64_MAX,
+            .expire_time = ent[2].lval,
+    };
+
+    /* Check if there is a field with a larger TTL. */
+    lpFindCb(lpt->lp, NULL, &r, cbFindInListpack, 0);
+
+    /* If list is empty or there is no field with a larger TTL, result will be
+     * NULL. Otherwise, just insert before the found item.*/
+    if (r.p)
+        lpt->lp = lpBatchInsert(lpt->lp, r.p, LP_BEFORE, ent, 3, NULL);
+    else
+        lpt->lp = lpBatchAppend(lpt->lp, ent, 3);
+}
+
+/* Add new field ordered by expire time. */
+void listpackExAddNew(robj *o, char *field, size_t flen,
+                      char *value, size_t vlen, uint64_t expireAt) {
+    listpackEntry ent[3] = {
+        {.sval = (unsigned char*) field, .slen = flen},
+        {.sval = (unsigned char*) value, .slen = vlen},
+        {.lval = expireAt}
+    };
+
+    listpackExAddInternal(o, ent);
+}
+
+/* If expiry time is changed, this function will place field into the correct
+ * position. First, it deletes the field and re-inserts to the listpack ordered
+ * by expiry time. */
+static void listpackExUpdateExpiry(robj *o, sds field,
+                                   unsigned char *fptr,
+                                   unsigned char *vptr,
+                                   uint64_t expire_at) {
+    unsigned int slen = 0;
+    long long val = 0;
+    unsigned char tmp[512] = {0};
+    unsigned char *valstr;
+    sds tmpval = NULL;
+    listpackEx *lpt = o->ptr;
+
+    /* Copy value */
+    valstr = lpGetValue(vptr, &slen, &val);
+    if (valstr) {
+        /* Normally, item length in the listpack is limited by
+         * 'hash-max-listpack-value' config. It is unlikely, but it might be
+         * larger than sizeof(tmp). */
+        if (slen > sizeof(tmp))
+            tmpval = sdsnewlen(valstr, slen);
+        else
+            memcpy(tmp, valstr, slen);
+    }
+
+    /* Delete field name, value and expiry time */
+    lpt->lp = lpDeleteRangeWithEntry(lpt->lp, &fptr, 3);
+
+    listpackEntry ent[3] = {{0}};
+
+    ent[0].sval = (unsigned char*) field;
+    ent[0].slen = sdslen(field);
+
+    if (valstr) {
+        ent[1].sval = tmpval ? (unsigned char *) tmpval : tmp;
+        ent[1].slen = slen;
+    } else {
+        ent[1].lval = val;
+    }
+    ent[2].lval = expire_at;
+
+    listpackExAddInternal(o, ent);
+    sdsfree(tmpval);
+}
+
+/* Update field expire time. */
+SetExRes hashTypeSetExpiryListpack(HashTypeSetEx *ex, sds field,
+                                   unsigned char *fptr, unsigned char *vptr,
+                                   unsigned char *tptr, uint64_t expireAt)
+{
+    long long expireTime;
+    uint64_t prevExpire = EB_EXPIRE_TIME_INVALID;
+
+    serverAssert(lpGetIntegerValue(tptr, &expireTime));
+
+    if (expireTime != HASH_LP_NO_TTL) {
+        prevExpire = (uint64_t) expireTime;
+    }
+
+    if (prevExpire == EB_EXPIRE_TIME_INVALID) {
+        /* For fields without expiry, LT condition is considered valid */
+        if (ex->expireSetCond & (HFE_XX | HFE_GT))
+            return HSETEX_NO_CONDITION_MET;
+    } else {
+        if (((ex->expireSetCond == HFE_GT) && (prevExpire >= expireAt)) ||
+            ((ex->expireSetCond == HFE_LT) && (prevExpire <= expireAt)) ||
+            (ex->expireSetCond == HFE_NX) )
+            return HSETEX_NO_CONDITION_MET;
+
+        /* Track of minimum expiration time (only later update global HFE DS) */
+        if (ex->minExpireFields > prevExpire)
+            ex->minExpireFields = prevExpire;
+    }
+
+    /* If expired, then delete the field and propagate the deletion.
+     * If replica, continue like the field is valid */
+    if (unlikely(checkAlreadyExpired(expireAt))) {
+        propagateHashFieldDeletion(ex->db, ex->key->ptr, field, sdslen(field));
+        hashTypeDelete(ex->hashObj, field, 1);
+        ex->fieldDeleted++;
+        return HSETEX_DELETED;
+    }
+
+    if (ex->minExpireFields > expireAt)
+        ex->minExpireFields = expireAt;
+
+    listpackExUpdateExpiry(ex->hashObj, field, fptr, vptr, expireAt);
+    ex->fieldUpdated++;
+    return HSETEX_OK;
+}
+
+/* Returns 1 if expired */
+int hashTypeIsExpired(const robj *o, uint64_t expireAt) {
+    if (o->encoding == OBJ_ENCODING_LISTPACK_EX) {
+        if (expireAt == HASH_LP_NO_TTL)
+            return 0;
+    } else if (o->encoding == OBJ_ENCODING_HT) {
+        if (expireAt == EB_EXPIRE_TIME_INVALID)
+            return 0;
+    } else {
+        serverPanic("Unknown encoding: %d", o->encoding);
+    }
+
+    return (mstime_t) expireAt < commandTimeSnapshot();
+}
+
+/* Returns listpack pointer of the object. */
+unsigned char *hashTypeListpackGetLp(robj *o) {
+    if (o->encoding == OBJ_ENCODING_LISTPACK)
+        return o->ptr;
+    else if (o->encoding == OBJ_ENCODING_LISTPACK_EX)
+        return ((listpackEx*)o->ptr)->lp;
+
+    serverPanic("Unknown encoding: %d", o->encoding);
+}
+
 /*-----------------------------------------------------------------------------
  * Hash type API
  *----------------------------------------------------------------------------*/
@@ -37,18 +589,19 @@
 /* Check the length of a number of objects to see if we need to convert a
  * listpack to a real hash. Note that we only check string encoded objects
  * as their string length can be queried in constant time. */
-void hashTypeTryConversion(robj *o, robj **argv, int start, int end) {
+void hashTypeTryConversion(redisDb *db, robj *o, robj **argv, int start, int end) {
     int i;
     size_t sum = 0;
 
-    if (o->encoding != OBJ_ENCODING_LISTPACK) return;
+    if (o->encoding != OBJ_ENCODING_LISTPACK && o->encoding != OBJ_ENCODING_LISTPACK_EX)
+        return;
 
     /* We guess that most of the values in the input are unique, so
      * if there are enough arguments we create a pre-sized hash, which
      * might over allocate memory if there are duplicates. */
     size_t new_fields = (end - start + 1) / 2;
     if (new_fields > server.hash_max_listpack_entries) {
-        hashTypeConvert(o, OBJ_ENCODING_HT);
+        hashTypeConvert(o, OBJ_ENCODING_HT, &db->hexpires);
         dictExpand(o->ptr, new_fields);
         return;
     }
@@ -58,122 +611,219 @@ void hashTypeTryConversion(robj *o, robj **argv, int start, int end) {
             continue;
         size_t len = sdslen(argv[i]->ptr);
         if (len > server.hash_max_listpack_value) {
-            hashTypeConvert(o, OBJ_ENCODING_HT);
+            hashTypeConvert(o, OBJ_ENCODING_HT, &db->hexpires);
             return;
         }
         sum += len;
     }
-    if (!lpSafeToAdd(o->ptr, sum))
-        hashTypeConvert(o, OBJ_ENCODING_HT);
+    if (!lpSafeToAdd(hashTypeListpackGetLp(o), sum))
+        hashTypeConvert(o, OBJ_ENCODING_HT, &db->hexpires);
 }
 
-/* Get the value from a listpack encoded hash, identified by field.
- * Returns -1 when the field cannot be found. */
-int hashTypeGetFromListpack(robj *o, sds field,
+/* Get the value from a listpack encoded hash, identified by field. */
+GetFieldRes hashTypeGetFromListpack(robj *o, sds field,
                             unsigned char **vstr,
                             unsigned int *vlen,
-                            long long *vll)
+                            long long *vll,
+                            uint64_t *expiredAt)
 {
+    *expiredAt = EB_EXPIRE_TIME_INVALID;
     unsigned char *zl, *fptr = NULL, *vptr = NULL;
 
-    serverAssert(o->encoding == OBJ_ENCODING_LISTPACK);
+    if (o->encoding == OBJ_ENCODING_LISTPACK) {
+        zl = o->ptr;
+        fptr = lpFirst(zl);
+        if (fptr != NULL) {
+            fptr = lpFind(zl, fptr, (unsigned char*)field, sdslen(field), 1);
+            if (fptr != NULL) {
+                /* Grab pointer to the value (fptr points to the field) */
+                vptr = lpNext(zl, fptr);
+                serverAssert(vptr != NULL);
+            }
+        }
+    } else if (o->encoding == OBJ_ENCODING_LISTPACK_EX) {
+        long long expire;
+        unsigned char *h;
+        listpackEx *lpt = o->ptr;
 
-    zl = o->ptr;
-    fptr = lpFirst(zl);
-    if (fptr != NULL) {
-        fptr = lpFind(zl, fptr, (unsigned char*)field, sdslen(field), 1);
+        fptr = lpFirst(lpt->lp);
         if (fptr != NULL) {
-            /* Grab pointer to the value (fptr points to the field) */
-            vptr = lpNext(zl, fptr);
-            serverAssert(vptr != NULL);
+            fptr = lpFind(lpt->lp, fptr, (unsigned char*)field, sdslen(field), 2);
+            if (fptr != NULL) {
+                vptr = lpNext(lpt->lp, fptr);
+                serverAssert(vptr != NULL);
+
+                h = lpNext(lpt->lp, vptr);
+                serverAssert(h && lpGetIntegerValue(h, &expire));
+                if (expire != HASH_LP_NO_TTL)
+                    *expiredAt = expire;
+            }
         }
+    } else {
+        serverPanic("Unknown hash encoding: %d", o->encoding);
     }
 
     if (vptr != NULL) {
         *vstr = lpGetValue(vptr, vlen, vll);
-        return 0;
+        return GETF_OK;
     }
 
-    return -1;
+    return GETF_NOT_FOUND;
 }
 
 /* Get the value from a hash table encoded hash, identified by field.
  * Returns NULL when the field cannot be found, otherwise the SDS value
  * is returned. */
-sds hashTypeGetFromHashTable(robj *o, sds field) {
+GetFieldRes hashTypeGetFromHashTable(robj *o, sds field, sds *value, uint64_t *expiredAt) {
     dictEntry *de;
 
+    *expiredAt = EB_EXPIRE_TIME_INVALID;
+
     serverAssert(o->encoding == OBJ_ENCODING_HT);
 
     de = dictFind(o->ptr, field);
-    if (de == NULL) return NULL;
-    return dictGetVal(de);
+
+    if (de == NULL)
+        return GETF_NOT_FOUND;
+
+    *expiredAt = hfieldGetExpireTime(dictGetKey(de));
+    *value = (sds) dictGetVal(de);
+    return GETF_OK;
 }
 
 /* Higher level function of hashTypeGet*() that returns the hash value
- * associated with the specified field. If the field is found C_OK
- * is returned, otherwise C_ERR. The returned object is returned by
- * reference in either *vstr and *vlen if it's returned in string form,
- * or stored in *vll if it's returned as a number.
+ * associated with the specified field.
+ * Arguments:
+ * hfeFlags      - Lookup for HFE_LAZY_* flags
  *
- * If *vll is populated *vstr is set to NULL, so the caller
- * can always check the function return by checking the return value
- * for C_OK and checking if vll (or vstr) is NULL. */
-int hashTypeGetValue(robj *o, sds field, unsigned char **vstr, unsigned int *vlen, long long *vll) {
-    if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ * Returned:
+ * GetFieldRes  - Result of get operation
+ * vstr, vlen   - if string, ref in either *vstr and *vlen if it's
+ *                returned in string form,
+ * vll          - or stored in *vll if it's returned as a number.
+ *                If *vll is populated *vstr is set to NULL, so the caller can
+ *                always check the function return by checking the return value
+ *                for GETF_OK and checking if vll (or vstr) is NULL.
+ *
+ */
+GetFieldRes hashTypeGetValue(redisDb *db, robj *o, sds field, unsigned char **vstr,
+                             unsigned int *vlen, long long *vll, int hfeFlags) {
+    uint64_t expiredAt;
+    sds key;
+    GetFieldRes res;
+    if (o->encoding == OBJ_ENCODING_LISTPACK ||
+        o->encoding == OBJ_ENCODING_LISTPACK_EX) {
         *vstr = NULL;
-        if (hashTypeGetFromListpack(o, field, vstr, vlen, vll) == 0)
-            return C_OK;
+        res = hashTypeGetFromListpack(o, field, vstr, vlen, vll, &expiredAt);
+
+        if (res == GETF_NOT_FOUND)
+            return GETF_NOT_FOUND;
+
     } else if (o->encoding == OBJ_ENCODING_HT) {
-        sds value;
-        if ((value = hashTypeGetFromHashTable(o, field)) != NULL) {
-            *vstr = (unsigned char*) value;
-            *vlen = sdslen(value);
-            return C_OK;
-        }
+        sds value = NULL;
+        res = hashTypeGetFromHashTable(o, field, &value, &expiredAt);
+
+        if (res == GETF_NOT_FOUND)
+            return GETF_NOT_FOUND;
+
+        *vstr = (unsigned char*) value;
+        *vlen = sdslen(value);
     } else {
         serverPanic("Unknown hash encoding");
     }
-    return C_ERR;
+
+    if (expiredAt >= (uint64_t) commandTimeSnapshot())
+        return GETF_OK;
+
+    if (server.masterhost) {
+        /* If CLIENT_MASTER, assume valid as long as it didn't get delete */
+        if (server.current_client && (server.current_client->flags & CLIENT_MASTER))
+            return GETF_OK;
+
+        /* If user client, then act as if expired, but don't delete! */
+        return GETF_EXPIRED;
+    }
+
+    if ((server.loading) ||
+        (server.lazy_expire_disabled) ||
+        (hfeFlags & HFE_LAZY_AVOID_FIELD_DEL) ||
+        (isPausedActionsWithUpdate(PAUSE_ACTION_EXPIRE)))
+        return GETF_EXPIRED;
+
+    if (o->encoding == OBJ_ENCODING_LISTPACK_EX)
+        key = ((listpackEx *) o->ptr)->key;
+    else
+        key = ((dictExpireMetadata *) dictMetadata((dict*)o->ptr))->key;
+
+    /* delete the field and propagate the deletion */
+    serverAssert(hashTypeDelete(o, field, 1) == 1);
+    propagateHashFieldDeletion(db, key, field, sdslen(field));
+
+    /* If the field is the last one in the hash, then the hash will be deleted */
+    res = GETF_EXPIRED;
+    robj *keyObj = createStringObject(key, sdslen(key));
+    if (!(hfeFlags & HFE_LAZY_NO_NOTIFICATION))
+        notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", keyObj, db->id);
+    if ((hashTypeLength(o, 0) == 0) && (!(hfeFlags & HFE_LAZY_AVOID_HASH_DEL))) {
+        if (!(hfeFlags & HFE_LAZY_NO_NOTIFICATION))
+            notifyKeyspaceEvent(NOTIFY_GENERIC, "del", keyObj, db->id);
+        dbDelete(db,keyObj);
+        res = GETF_EXPIRED_HASH;
+    }
+    signalModifiedKey(NULL, db, keyObj);
+    decrRefCount(keyObj);
+    return res;
 }
 
 /* Like hashTypeGetValue() but returns a Redis object, which is useful for
  * interaction with the hash type outside t_hash.c.
  * The function returns NULL if the field is not found in the hash. Otherwise
- * a newly allocated string object with the value is returned. */
-robj *hashTypeGetValueObject(robj *o, sds field) {
+ * a newly allocated string object with the value is returned.
+ *
+ * hfeFlags      - Lookup HFE_LAZY_* flags
+ * isHashDeleted - If attempted to access expired field and it's the last field
+ *                 in the hash, then the hash will as well be deleted. In this case,
+ *                 isHashDeleted will be set to 1.
+ */
+robj *hashTypeGetValueObject(redisDb *db, robj *o, sds field, int hfeFlags, int *isHashDeleted) {
     unsigned char *vstr;
     unsigned int vlen;
     long long vll;
 
-    if (hashTypeGetValue(o,field,&vstr,&vlen,&vll) == C_ERR) return NULL;
-    if (vstr) return createStringObject((char*)vstr,vlen);
-    else return createStringObjectFromLongLong(vll);
-}
+    if (isHashDeleted) *isHashDeleted = 0;
+    GetFieldRes res = hashTypeGetValue(db,o,field,&vstr,&vlen,&vll, hfeFlags);
 
-/* Higher level function using hashTypeGet*() to return the length of the
- * object associated with the requested field, or 0 if the field does not
- * exist. */
-size_t hashTypeGetValueLength(robj *o, sds field) {
-    size_t len = 0;
-    unsigned char *vstr = NULL;
-    unsigned int vlen = UINT_MAX;
-    long long vll = LLONG_MAX;
+    if (res == GETF_OK) {
+        if (vstr) return createStringObject((char*)vstr,vlen);
+        else return createStringObjectFromLongLong(vll);
+    }
 
-    if (hashTypeGetValue(o, field, &vstr, &vlen, &vll) == C_OK)
-        len = vstr ? vlen : sdigits10(vll);
+    if ((res == GETF_EXPIRED_HASH) && (isHashDeleted))
+        *isHashDeleted = 1;
 
-    return len;
+    /* GETF_EXPIRED_HASH, GETF_EXPIRED, GETF_NOT_FOUND */
+    return NULL;
 }
 
-/* Test if the specified field exists in the given hash. Returns 1 if the field
- * exists, and 0 when it doesn't. */
-int hashTypeExists(robj *o, sds field) {
+/* Test if the specified field exists in the given hash. If the field is
+ * expired (HFE), then it will be lazy deleted
+ *
+ * hfeFlags      - Lookup HFE_LAZY_* flags
+ * isHashDeleted - If attempted to access expired field and it is the last field
+ *                 in the hash, then the hash will as well be deleted. In this case,
+ *                 isHashDeleted will be set to 1.
+ *
+ * Returns 1 if the field exists, and 0 when it doesn't.
+ */
+int hashTypeExists(redisDb *db, robj *o, sds field, int hfeFlags, int *isHashDeleted) {
     unsigned char *vstr = NULL;
     unsigned int vlen = UINT_MAX;
     long long vll = LLONG_MAX;
 
-    return hashTypeGetValue(o, field, &vstr, &vlen, &vll) == C_OK;
+    GetFieldRes res = hashTypeGetValue(db, o, field, &vstr, &vlen, &vll, hfeFlags);
+    if (isHashDeleted)
+        *isHashDeleted = (res == GETF_EXPIRED_HASH) ? 1 : 0;
+    return (res == GETF_OK) ? 1 : 0;
 }
 
 /* Add a new field, overwrite the old with the new value if it already exists.
@@ -183,8 +833,9 @@ int hashTypeExists(robj *o, sds field) {
  * caller retains ownership of the strings passed. However this behavior
  * can be effected by passing appropriate flags (possibly bitwise OR-ed):
  *
- * HASH_SET_TAKE_FIELD -- The SDS field ownership passes to the function.
- * HASH_SET_TAKE_VALUE -- The SDS value ownership passes to the function.
+ * HASH_SET_TAKE_FIELD  -- The SDS field ownership passes to the function.
+ * HASH_SET_TAKE_VALUE  -- The SDS value ownership passes to the function.
+ * HASH_SET_KEEP_TTL --  keep original TTL if field already exists
  *
  * When the flags are used the caller does not need to release the passed
  * SDS string(s). It's up to the function to use the string to create a new
@@ -194,20 +845,22 @@ int hashTypeExists(robj *o, sds field) {
  * semantics of copying the values if needed.
  *
  */
-#define HASH_SET_TAKE_FIELD (1<<0)
-#define HASH_SET_TAKE_VALUE (1<<1)
+#define HASH_SET_TAKE_FIELD  (1<<0)
+#define HASH_SET_TAKE_VALUE  (1<<1)
+#define HASH_SET_KEEP_TTL (1<<2)
 #define HASH_SET_COPY 0
-int hashTypeSet(robj *o, sds field, sds value, int flags) {
+int hashTypeSet(redisDb *db, robj *o, sds field, sds value, int flags) {
     int update = 0;
 
     /* Check if the field is too long for listpack, and convert before adding the item.
      * This is needed for HINCRBY* case since in other commands this is handled early by
      * hashTypeTryConversion, so this check will be a NOP. */
-    if (o->encoding == OBJ_ENCODING_LISTPACK) {
+    if (o->encoding == OBJ_ENCODING_LISTPACK  ||
+        o->encoding == OBJ_ENCODING_LISTPACK_EX) {
         if (sdslen(field) > server.hash_max_listpack_value || sdslen(value) > server.hash_max_listpack_value)
-            hashTypeConvert(o, OBJ_ENCODING_HT);
+            hashTypeConvert(o, OBJ_ENCODING_HT, &db->hexpires);
     }
-    
+
     if (o->encoding == OBJ_ENCODING_LISTPACK) {
         unsigned char *zl, *fptr, *vptr;
 
@@ -219,10 +872,10 @@ int hashTypeSet(robj *o, sds field, sds value, int flags) {
                 /* Grab pointer to the value (fptr points to the field) */
                 vptr = lpNext(zl, fptr);
                 serverAssert(vptr != NULL);
-                update = 1;
 
                 /* Replace value */
                 zl = lpReplace(zl, &vptr, (unsigned char*)value, sdslen(value));
+                update = 1;
             }
         }
 
@@ -234,30 +887,80 @@ int hashTypeSet(robj *o, sds field, sds value, int flags) {
         o->ptr = zl;
 
         /* Check if the listpack needs to be converted to a hash table */
-        if (hashTypeLength(o) > server.hash_max_listpack_entries)
-            hashTypeConvert(o, OBJ_ENCODING_HT);
+        if (hashTypeLength(o, 0) > server.hash_max_listpack_entries)
+            hashTypeConvert(o, OBJ_ENCODING_HT, &db->hexpires);
+    } else if (o->encoding == OBJ_ENCODING_LISTPACK_EX) {
+        unsigned char *fptr = NULL, *vptr = NULL, *tptr = NULL;
+        listpackEx *lpt = o->ptr;
+        long long expireTime = HASH_LP_NO_TTL;
+
+        fptr = lpFirst(lpt->lp);
+        if (fptr != NULL) {
+            fptr = lpFind(lpt->lp, fptr, (unsigned char*)field, sdslen(field), 2);
+            if (fptr != NULL) {
+                /* Grab pointer to the value (fptr points to the field) */
+                vptr = lpNext(lpt->lp, fptr);
+                serverAssert(vptr != NULL);
+
+                /* Replace value */
+                lpt->lp = lpReplace(lpt->lp, &vptr, (unsigned char *) value, sdslen(value));
+                update = 1;
+
+                fptr = lpPrev(lpt->lp, vptr);
+                serverAssert(fptr != NULL);
+
+                tptr = lpNext(lpt->lp, vptr);
+                serverAssert(tptr && lpGetIntegerValue(tptr, &expireTime));
+
+                if (flags & HASH_SET_KEEP_TTL) {
+                    /* keep old field along with TTL */
+                } else if (expireTime != HASH_LP_NO_TTL) {
+                    /* re-insert field and override TTL */
+                    listpackExUpdateExpiry(o, field, fptr, vptr, HASH_LP_NO_TTL);
+                }
+            }
+        }
+
+        if (!update)
+            listpackExAddNew(o, field, sdslen(field), value, sdslen(value),
+                             HASH_LP_NO_TTL);
+
+        /* Check if the listpack needs to be converted to a hash table */
+        if (hashTypeLength(o, 0) > server.hash_max_listpack_entries)
+            hashTypeConvert(o, OBJ_ENCODING_HT, &db->hexpires);
+
     } else if (o->encoding == OBJ_ENCODING_HT) {
+        hfield newField = hfieldNew(field, sdslen(field), 0);
         dict *ht = o->ptr;
         dictEntry *de, *existing;
-        sds v;
-        if (flags & HASH_SET_TAKE_VALUE) {
-            v = value;
-            value = NULL;
-        } else {
-            v = sdsdup(value);
-        }
-        de = dictAddRaw(ht, field, &existing);
-        if (de) {
-            dictSetVal(ht, de, v);
-            if (flags & HASH_SET_TAKE_FIELD) {
-                field = NULL;
+
+        /* stored key is different than lookup key */
+        dictUseStoredKeyApi(ht, 1);
+        de = dictAddRaw(ht, newField, &existing);
+        dictUseStoredKeyApi(ht, 0);
+
+        /* If field already exists, then update "field". "Value" will be set afterward */
+        if (de == NULL) {
+            if (flags & HASH_SET_KEEP_TTL) {
+                /* keep old field along with TTL */
+                hfieldFree(newField);
             } else {
-                dictSetKey(ht, de, sdsdup(field));
+                /* If attached TTL to the old field, then remove it from hash's private ebuckets */
+                hfield oldField = dictGetKey(existing);
+                hfieldPersist(o, oldField);
+                hfieldFree(oldField);
+                dictSetKey(ht, existing, newField);
             }
-        } else {
             sdsfree(dictGetVal(existing));
-            dictSetVal(ht, existing, v);
             update = 1;
+            de = existing;
+        }
+
+        if (flags & HASH_SET_TAKE_VALUE) {
+            dictSetVal(ht, de, value);
+            flags &= ~HASH_SET_TAKE_VALUE;
+        } else {
+            dictSetVal(ht, de, sdsdup(value));
         }
     } else {
         serverPanic("Unknown hash encoding");
@@ -270,10 +973,243 @@ int hashTypeSet(robj *o, sds field, sds value, int flags) {
     return update;
 }
 
+SetExRes hashTypeSetExpiryHT(HashTypeSetEx *exInfo, sds field, uint64_t expireAt) {
+    dict *ht = exInfo->hashObj->ptr;
+    dictEntry *existingEntry = NULL;
+
+    /* New field with expiration metadata */
+    hfield hfNew = hfieldNew(field, sdslen(field), 1 /*withExpireMeta*/);
+
+    if ((existingEntry = dictFind(ht, field)) == NULL) {
+        hfieldFree(hfNew);
+        return HSETEX_NO_FIELD;
+    }
+
+    hfield hfOld = dictGetKey(existingEntry);
+
+    /* If field doesn't have expiry metadata attached */
+    if (!hfieldIsExpireAttached(hfOld)) {
+
+        /* For fields without expiry, LT condition is considered valid */
+        if (exInfo->expireSetCond & (HFE_XX | HFE_GT)) {
+            hfieldFree(hfNew);
+            return HSETEX_NO_CONDITION_MET;
+        }
+
+        /* Delete old field. Below goanna dictSetKey(..,hfNew) */
+        hfieldFree(hfOld);
+
+    } else { /* field has ExpireMeta struct attached */
+
+        /* No need for hfNew (Just modify expire-time of existing field) */
+        hfieldFree(hfNew);
+
+        uint64_t prevExpire = hfieldGetExpireTime(hfOld);
+
+        /* If field has valid expiration time, then check GT|LT|NX */
+        if (prevExpire != EB_EXPIRE_TIME_INVALID) {
+            if (((exInfo->expireSetCond == HFE_GT) && (prevExpire >= expireAt)) ||
+                ((exInfo->expireSetCond == HFE_LT) && (prevExpire <= expireAt)) ||
+                (exInfo->expireSetCond == HFE_NX) )
+                return HSETEX_NO_CONDITION_MET;
+
+            /* remove old expiry time from hash's private ebuckets */
+            dictExpireMetadata *dm = (dictExpireMetadata *) dictMetadata(ht);
+            ebRemove(&dm->hfe, &hashFieldExpireBucketsType, hfOld);
+
+            /* Track of minimum expiration time (only later update global HFE DS) */
+            if (exInfo->minExpireFields > prevExpire)
+                exInfo->minExpireFields = prevExpire;
+
+        } else {
+            /* field has invalid expiry. No need to ebRemove() */
+
+            /* Check XX|LT|GT */
+            if (exInfo->expireSetCond & (HFE_XX | HFE_GT))
+                return HSETEX_NO_CONDITION_MET;
+        }
+
+        /* Reuse hfOld as hfNew and rewrite its expiry with ebAdd() */
+        hfNew = hfOld;
+    }
+
+    dictSetKey(ht, existingEntry, hfNew);
+
+
+    /* If expired, then delete the field and propagate the deletion.
+     * If replica, continue like the field is valid */
+    if (unlikely(checkAlreadyExpired(expireAt))) {
+        /* replicas should not initiate deletion of fields */
+        propagateHashFieldDeletion(exInfo->db, exInfo->key->ptr, field, sdslen(field));
+        hashTypeDelete(exInfo->hashObj, field, 1);
+        exInfo->fieldDeleted++;
+        return HSETEX_DELETED;
+    }
+
+    if (exInfo->minExpireFields > expireAt)
+        exInfo->minExpireFields = expireAt;
+
+    dictExpireMetadata *dm = (dictExpireMetadata *) dictMetadata(ht);
+    ebAdd(&dm->hfe, &hashFieldExpireBucketsType, hfNew, expireAt);
+    exInfo->fieldUpdated++;
+    return HSETEX_OK;
+}
+
+/*
+ * Set field expiration
+ *
+ * Take care to call first hashTypeSetExInit() and then call this function.
+ * Finally, call hashTypeSetExDone() to notify and update global HFE DS.
+ */
+SetExRes hashTypeSetEx(robj *o, sds field, uint64_t expireAt, HashTypeSetEx *exInfo)
+{
+    if (o->encoding == OBJ_ENCODING_LISTPACK_EX)
+    {
+        unsigned char *fptr = NULL, *vptr = NULL, *tptr = NULL;
+
+        listpackEx *lpt = o->ptr;
+        long long expireTime = HASH_LP_NO_TTL;
+
+        if ((fptr = lpFirst(lpt->lp)) == NULL)
+            return HSETEX_NO_FIELD;
+
+        fptr = lpFind(lpt->lp, fptr, (unsigned char*)field, sdslen(field), 2);
+
+        if (!fptr)
+            return HSETEX_NO_FIELD;
+
+        /* Grab pointer to the value (fptr points to the field) */
+        vptr = lpNext(lpt->lp, fptr);
+        serverAssert(vptr != NULL);
+
+        tptr = lpNext(lpt->lp, vptr);
+        serverAssert(tptr && lpGetIntegerValue(tptr, &expireTime));
+
+        /* update TTL */
+        return hashTypeSetExpiryListpack(exInfo, field, fptr, vptr, tptr, expireAt);
+    } else if (o->encoding == OBJ_ENCODING_HT) {
+        /* If needed to set the field along with expiry */
+        return hashTypeSetExpiryHT(exInfo, field, expireAt);
+    } else {
+        serverPanic("Unknown hash encoding");
+    }
+
+    return HSETEX_OK; /* never reach here */
+}
+
+void initDictExpireMetadata(sds key, robj *o) {
+    dict *ht = o->ptr;
+
+    dictExpireMetadata *m = (dictExpireMetadata *) dictMetadata(ht);
+    m->key = key;
+    m->hfe = ebCreate();     /* Allocate HFE DS */
+    m->expireMeta.trash = 1; /* mark as trash (as long it wasn't ebAdd()) */
+}
+
+/* Init HashTypeSetEx struct before calling hashTypeSetEx() */
+int hashTypeSetExInit(robj *key, robj *o, client *c, redisDb *db, const char *cmd,
+                      ExpireSetCond expireSetCond, HashTypeSetEx *ex)
+{
+    dict *ht = o->ptr;
+    ex->expireSetCond = expireSetCond;
+    ex->minExpire = EB_EXPIRE_TIME_INVALID;
+    ex->c = c;
+    ex->cmd = cmd;
+    ex->db = db;
+    ex->key = key;
+    ex->hashObj = o;
+    ex->fieldDeleted = 0;
+    ex->fieldUpdated = 0;
+    ex->minExpireFields = EB_EXPIRE_TIME_INVALID;
+
+    /* Take care that HASH support expiration */
+    if (o->encoding == OBJ_ENCODING_LISTPACK) {
+        hashTypeConvert(o, OBJ_ENCODING_LISTPACK_EX, &c->db->hexpires);
+
+        listpackEx *lpt = o->ptr;
+        dictEntry *de = dbFind(c->db, key->ptr);
+        serverAssert(de != NULL);
+        lpt->key = dictGetKey(de);
+    } else if (o->encoding == OBJ_ENCODING_HT) {
+        /* Take care dict has HFE metadata */
+        if (!isDictWithMetaHFE(ht)) {
+            /* Realloc (only header of dict) with metadata for hash-field expiration */
+            dictTypeAddMeta(&ht, &mstrHashDictTypeWithHFE);
+            dictExpireMetadata *m = (dictExpireMetadata *) dictMetadata(ht);
+            o->ptr = ht;
+
+            /* Find the key in the keyspace. Need to keep reference to the key for
+             * notifications or even removal of the hash */
+            dictEntry *de = dbFind(db, key->ptr);
+            serverAssert(de != NULL);
+
+            /* Fillup dict HFE metadata */
+            m->key = dictGetKey(de); /* reference key in keyspace */
+            m->hfe = ebCreate();     /* Allocate HFE DS */
+            m->expireMeta.trash = 1; /* mark as trash (as long it wasn't ebAdd()) */
+        }
+    }
+
+    /* Read minExpire from attached ExpireMeta to the hash */
+    ex->minExpire = hashTypeGetMinExpire(o, 0);
+    return C_OK;
+}
+
+/*
+ * After calling hashTypeSetEx() for setting fields or their expiry, call this
+ * function to notify and update global HFE DS.
+ */
+void hashTypeSetExDone(HashTypeSetEx *ex) {
+    /* Notify keyspace event, update dirty count and update global HFE DS */
+    if (ex->fieldDeleted + ex->fieldUpdated > 0) {
+
+        server.dirty += ex->fieldDeleted + ex->fieldUpdated;
+        if (ex->fieldDeleted && hashTypeLength(ex->hashObj, 0) == 0) {
+            dbDelete(ex->db,ex->key);
+            signalModifiedKey(ex->c, ex->db, ex->key);
+            notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", ex->key, ex->db->id);
+            notifyKeyspaceEvent(NOTIFY_GENERIC,"del",ex->key, ex->db->id);
+        } else {
+            signalModifiedKey(ex->c, ex->db, ex->key);
+            notifyKeyspaceEvent(NOTIFY_HASH, ex->fieldDeleted ? "hexpired" : "hexpire",
+                                ex->key, ex->db->id);
+
+            /* If minimum HFE of the hash is smaller than expiration time of the
+             * specified fields in the command as well as it is smaller or equal
+             * than expiration time provided in the command, then the minimum
+             * HFE of the hash won't change following this command. */
+            if ((ex->minExpire < ex->minExpireFields))
+                return;
+
+            /* Retrieve new expired time. It might have changed. */
+            uint64_t newMinExpire = hashTypeGetMinExpire(ex->hashObj, 1 /*accurate*/);
+
+            /* Calculate the diff between old minExpire and newMinExpire. If it is
+             * only few seconds, then don't have to update global HFE DS. At the worst
+             * case fields of hash will be active-expired up to few seconds later.
+             *
+             * In any case, active-expire operation will know to update global
+             * HFE DS more efficiently than here for a single item.
+             */
+            uint64_t diff = (ex->minExpire > newMinExpire) ?
+                                (ex->minExpire - newMinExpire) : (newMinExpire - ex->minExpire);
+            if (diff < HASH_NEW_EXPIRE_DIFF_THRESHOLD) return;
+
+            if (ex->minExpire != EB_EXPIRE_TIME_INVALID)
+                ebRemove(&ex->db->hexpires, &hashExpireBucketsType, ex->hashObj);
+            if (newMinExpire != EB_EXPIRE_TIME_INVALID)
+                ebAdd(&ex->db->hexpires, &hashExpireBucketsType, ex->hashObj, newMinExpire);
+        }
+    }
+}
+
 /* Delete an element from a hash.
- * Return 1 on deleted and 0 on not found. */
-int hashTypeDelete(robj *o, sds field) {
+ *
+ * Return 1 on deleted and 0 on not found.
+ * isSdsField - 1 if the field is sds, 0 if it is hfield */
+int hashTypeDelete(robj *o, void *field, int isSdsField) {
     int deleted = 0;
+    int fieldLen = (isSdsField) ? sdslen((sds)field) : hfieldlen((hfield)field);
 
     if (o->encoding == OBJ_ENCODING_LISTPACK) {
         unsigned char *zl, *fptr;
@@ -281,7 +1217,7 @@ int hashTypeDelete(robj *o, sds field) {
         zl = o->ptr;
         fptr = lpFirst(zl);
         if (fptr != NULL) {
-            fptr = lpFind(zl, fptr, (unsigned char*)field, sdslen(field), 1);
+            fptr = lpFind(zl, fptr, (unsigned char*)field, fieldLen, 1);
             if (fptr != NULL) {
                 /* Delete both of the key and the value. */
                 zl = lpDeleteRangeWithEntry(zl,&fptr,2);
@@ -289,13 +1225,26 @@ int hashTypeDelete(robj *o, sds field) {
                 deleted = 1;
             }
         }
+    } else if (o->encoding == OBJ_ENCODING_LISTPACK_EX) {
+        unsigned char *fptr;
+        listpackEx *lpt = o->ptr;
+
+        fptr = lpFirst(lpt->lp);
+        if (fptr != NULL) {
+            fptr = lpFind(lpt->lp, fptr, (unsigned char*)field, fieldLen, 2);
+            if (fptr != NULL) {
+                /* Delete field, value and ttl */
+                lpt->lp = lpDeleteRangeWithEntry(lpt->lp, &fptr, 3);
+                deleted = 1;
+            }
+        }
     } else if (o->encoding == OBJ_ENCODING_HT) {
+        /* dictDelete() will call dictHfieldDestructor() */
+        dictUseStoredKeyApi((dict*)o->ptr, isSdsField ? 0 : 1);
         if (dictDelete((dict*)o->ptr, field) == C_OK) {
             deleted = 1;
-
-            /* Always check if the dictionary needs a resize after a delete. */
-            if (htNeedsResize(o->ptr)) dictResize(o->ptr);
         }
+        dictUseStoredKeyApi((dict*)o->ptr, 0);
 
     } else {
         serverPanic("Unknown hash encoding");
@@ -303,14 +1252,33 @@ int hashTypeDelete(robj *o, sds field) {
     return deleted;
 }
 
-/* Return the number of elements in a hash. */
-unsigned long hashTypeLength(const robj *o) {
+/* Return the number of elements in a hash.
+ *
+ * Note, subtractExpiredFields=1 might be pricy in case there are many HFEs
+ */
+unsigned long hashTypeLength(const robj *o, int subtractExpiredFields) {
     unsigned long length = ULONG_MAX;
 
     if (o->encoding == OBJ_ENCODING_LISTPACK) {
         length = lpLength(o->ptr) / 2;
+    } else if (o->encoding == OBJ_ENCODING_LISTPACK_EX) {
+        listpackEx *lpt = o->ptr;
+        length = lpLength(lpt->lp) / 3;
+
+        if (subtractExpiredFields && lpt->meta.trash == 0)
+            length -= listpackExExpireDryRun(o);
     } else if (o->encoding == OBJ_ENCODING_HT) {
-        length = dictSize((const dict*)o->ptr);
+        uint64_t expiredItems = 0;
+        dict *d = (dict*)o->ptr;
+        if (subtractExpiredFields && isDictWithMetaHFE(d)) {
+            dictExpireMetadata *meta = (dictExpireMetadata *) dictMetadata(d);
+            /* If dict registered in global HFE DS */
+            if (meta->expireMeta.trash == 0)
+                expiredItems = ebExpireDryRun(meta->hfe,
+                                              &hashFieldExpireBucketsType,
+                                              commandTimeSnapshot());
+        }
+        length = dictSize(d) - expiredItems;
     } else {
         serverPanic("Unknown hash encoding");
     }
@@ -322,9 +1290,13 @@ hashTypeIterator *hashTypeInitIterator(robj *subject) {
     hi->subject = subject;
     hi->encoding = subject->encoding;
 
-    if (hi->encoding == OBJ_ENCODING_LISTPACK) {
+    if (hi->encoding == OBJ_ENCODING_LISTPACK ||
+        hi->encoding == OBJ_ENCODING_LISTPACK_EX)
+    {
         hi->fptr = NULL;
         hi->vptr = NULL;
+        hi->tptr = NULL;
+        hi->expire_time = EB_EXPIRE_TIME_INVALID;
     } else if (hi->encoding == OBJ_ENCODING_HT) {
         hi->di = dictGetIterator(subject->ptr);
     } else {
@@ -341,7 +1313,8 @@ void hashTypeReleaseIterator(hashTypeIterator *hi) {
 
 /* Move to the next entry in the hash. Return C_OK when the next entry
  * could be found and C_ERR when the iterator reaches the end. */
-int hashTypeNext(hashTypeIterator *hi) {
+int hashTypeNext(hashTypeIterator *hi, int skipExpiredFields) {
+    hi->expire_time = EB_EXPIRE_TIME_INVALID;
     if (hi->encoding == OBJ_ENCODING_LISTPACK) {
         unsigned char *zl;
         unsigned char *fptr, *vptr;
@@ -368,8 +1341,56 @@ int hashTypeNext(hashTypeIterator *hi) {
         /* fptr, vptr now point to the first or next pair */
         hi->fptr = fptr;
         hi->vptr = vptr;
+    } else if (hi->encoding == OBJ_ENCODING_LISTPACK_EX) {
+        long long expire_time;
+        unsigned char *zl = hashTypeListpackGetLp(hi->subject);
+        unsigned char *fptr, *vptr, *tptr;
+
+        fptr = hi->fptr;
+        vptr = hi->vptr;
+        tptr = hi->tptr;
+
+        if (fptr == NULL) {
+            /* Initialize cursor */
+            serverAssert(vptr == NULL);
+            fptr = lpFirst(zl);
+        } else {
+            /* Advance cursor */
+            serverAssert(tptr != NULL);
+            fptr = lpNext(zl, tptr);
+        }
+        if (fptr == NULL) return C_ERR;
+
+        while (fptr != NULL) {
+            /* Grab pointer to the value (fptr points to the field) */
+            vptr = lpNext(zl, fptr);
+            serverAssert(vptr != NULL);
+
+            tptr = lpNext(zl, vptr);
+            serverAssert(tptr && lpGetIntegerValue(tptr, &expire_time));
+
+            if (!skipExpiredFields || !hashTypeIsExpired(hi->subject, expire_time))
+                break;
+
+            fptr = lpNext(zl, tptr);
+        }
+        if (fptr == NULL) return C_ERR;
+
+        /* fptr, vptr now point to the first or next pair */
+        hi->fptr = fptr;
+        hi->vptr = vptr;
+        hi->tptr = tptr;
+        hi->expire_time = (expire_time != HASH_LP_NO_TTL) ? (uint64_t) expire_time : EB_EXPIRE_TIME_INVALID;
     } else if (hi->encoding == OBJ_ENCODING_HT) {
-        if ((hi->de = dictNext(hi->di)) == NULL) return C_ERR;
+
+        while ((hi->de = dictNext(hi->di)) != NULL) {
+            hi->expire_time = hfieldGetExpireTime(dictGetKey(hi->de));
+            /* this condition still valid if expire_time equals EB_EXPIRE_TIME_INVALID */
+            if (skipExpiredFields && ((mstime_t)hi->expire_time < commandTimeSnapshot()))
+                continue;
+            return C_OK;
+        }
+        return C_ERR;
     } else {
         serverPanic("Unknown hash encoding");
     }
@@ -381,28 +1402,45 @@ int hashTypeNext(hashTypeIterator *hi) {
 void hashTypeCurrentFromListpack(hashTypeIterator *hi, int what,
                                  unsigned char **vstr,
                                  unsigned int *vlen,
-                                 long long *vll)
+                                 long long *vll,
+                                 uint64_t *expireTime)
 {
-    serverAssert(hi->encoding == OBJ_ENCODING_LISTPACK);
+    serverAssert(hi->encoding == OBJ_ENCODING_LISTPACK ||
+                 hi->encoding == OBJ_ENCODING_LISTPACK_EX);
 
     if (what & OBJ_HASH_KEY) {
         *vstr = lpGetValue(hi->fptr, vlen, vll);
     } else {
         *vstr = lpGetValue(hi->vptr, vlen, vll);
     }
+
+    if (expireTime)
+        *expireTime = hi->expire_time;
 }
 
 /* Get the field or value at iterator cursor, for an iterator on a hash value
  * encoded as a hash table. Prototype is similar to
- * `hashTypeGetFromHashTable`. */
-sds hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what) {
+ * `hashTypeGetFromHashTable`.
+ *
+ * expireTime - If parameter is not null, then the function will return the expire
+ *              time of the field. If expiry not set, return EB_EXPIRE_TIME_INVALID
+ */
+void hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what, char **str, size_t *len, uint64_t *expireTime) {
     serverAssert(hi->encoding == OBJ_ENCODING_HT);
+    hfield key = NULL;
 
     if (what & OBJ_HASH_KEY) {
-        return dictGetKey(hi->de);
+        key = dictGetKey(hi->de);
+        *str = key;
+        *len = hfieldlen(key);
     } else {
-        return dictGetVal(hi->de);
+        sds val = dictGetVal(hi->de);
+        *str = val;
+        *len = sdslen(val);
     }
+
+    if (expireTime)
+        *expireTime = hi->expire_time;
 }
 
 /* Higher level function of hashTypeCurrent*() that returns the hash value
@@ -415,14 +1453,24 @@ sds hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what) {
  * If *vll is populated *vstr is set to NULL, so the caller
  * can always check the function return by checking the return value
  * type checking if vstr == NULL. */
-void hashTypeCurrentObject(hashTypeIterator *hi, int what, unsigned char **vstr, unsigned int *vlen, long long *vll) {
-    if (hi->encoding == OBJ_ENCODING_LISTPACK) {
+void hashTypeCurrentObject(hashTypeIterator *hi,
+                           int what,
+                           unsigned char **vstr,
+                           unsigned int *vlen,
+                           long long *vll,
+                           uint64_t *expireTime)
+{
+    if (hi->encoding == OBJ_ENCODING_LISTPACK ||
+        hi->encoding == OBJ_ENCODING_LISTPACK_EX)
+    {
         *vstr = NULL;
-        hashTypeCurrentFromListpack(hi, what, vstr, vlen, vll);
+        hashTypeCurrentFromListpack(hi, what, vstr, vlen, vll, expireTime);
     } else if (hi->encoding == OBJ_ENCODING_HT) {
-        sds ele = hashTypeCurrentFromHashTable(hi, what);
+        char *ele;
+        size_t eleLen;
+        hashTypeCurrentFromHashTable(hi, what, &ele, &eleLen, expireTime);
         *vstr = (unsigned char*) ele;
-        *vlen = sdslen(ele);
+        *vlen = eleLen;
     } else {
         serverPanic("Unknown hash encoding");
     }
@@ -435,12 +1483,32 @@ sds hashTypeCurrentObjectNewSds(hashTypeIterator *hi, int what) {
     unsigned int vlen;
     long long vll;
 
-    hashTypeCurrentObject(hi,what,&vstr,&vlen,&vll);
+    hashTypeCurrentObject(hi,what,&vstr,&vlen,&vll, NULL);
     if (vstr) return sdsnewlen(vstr,vlen);
     return sdsfromlonglong(vll);
 }
 
-robj *hashTypeLookupWriteOrCreate(client *c, robj *key) {
+/* Return the key at the current iterator position as a new hfield string. */
+hfield hashTypeCurrentObjectNewHfield(hashTypeIterator *hi) {
+    char buf[LONG_STR_SIZE];
+    unsigned char *vstr;
+    unsigned int vlen;
+    long long vll;
+    uint64_t expireTime;
+    hfield hf;
+
+    hashTypeCurrentObject(hi,OBJ_HASH_KEY,&vstr,&vlen,&vll, &expireTime);
+
+    if (!vstr) {
+        vlen = ll2string(buf, sizeof(buf), vll);
+        vstr = (unsigned char *) buf;
+    }
+
+    hf = hfieldNew(vstr,vlen, expireTime != EB_EXPIRE_TIME_INVALID);
+    return hf;
+}
+
+static robj *hashTypeLookupWriteOrCreate(client *c, robj *key) {
     robj *o = lookupKeyWrite(c->db,key);
     if (checkType(c,o,OBJ_HASH)) return NULL;
 
@@ -458,25 +1526,43 @@ void hashTypeConvertListpack(robj *o, int enc) {
     if (enc == OBJ_ENCODING_LISTPACK) {
         /* Nothing to do... */
 
+    } else if (enc == OBJ_ENCODING_LISTPACK_EX) {
+        unsigned char *p;
+
+        /* Append HASH_LP_NO_TTL to each field name - value pair. */
+        p = lpFirst(o->ptr);
+        while (p != NULL) {
+            p = lpNext(o->ptr, p);
+            serverAssert(p);
+
+            o->ptr = lpInsertInteger(o->ptr, HASH_LP_NO_TTL, p, LP_AFTER, &p);
+            p = lpNext(o->ptr, p);
+        }
+
+        listpackEx *lpt = listpackExCreate();
+        lpt->lp = o->ptr;
+        o->encoding = OBJ_ENCODING_LISTPACK_EX;
+        o->ptr = lpt;
     } else if (enc == OBJ_ENCODING_HT) {
         hashTypeIterator *hi;
         dict *dict;
         int ret;
 
         hi = hashTypeInitIterator(o);
-        dict = dictCreate(&hashDictType);
+        dict = dictCreate(&mstrHashDictType);
 
         /* Presize the dict to avoid rehashing */
-        dictExpand(dict,hashTypeLength(o));
+        dictExpand(dict,hashTypeLength(o, 0));
 
-        while (hashTypeNext(hi) != C_ERR) {
-            sds key, value;
+        while (hashTypeNext(hi, 0) != C_ERR) {
 
-            key = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_KEY);
-            value = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_VALUE);
+            hfield key = hashTypeCurrentObjectNewHfield(hi);
+            sds value = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_VALUE);
+            dictUseStoredKeyApi(dict, 1);
             ret = dictAdd(dict, key, value);
+            dictUseStoredKeyApi(dict, 0);
             if (ret != DICT_OK) {
-                sdsfree(key); sdsfree(value); /* Needed for gcc ASAN */
+                hfieldFree(key); sdsfree(value); /* Needed for gcc ASAN */
                 hashTypeReleaseIterator(hi);  /* Needed for gcc ASAN */
                 serverLogHexDump(LL_WARNING,"listpack with dup elements dump",
                     o->ptr,lpBytes(o->ptr));
@@ -492,9 +1578,69 @@ void hashTypeConvertListpack(robj *o, int enc) {
     }
 }
 
-void hashTypeConvert(robj *o, int enc) {
+void hashTypeConvertListpackEx(robj *o, int enc, ebuckets *hexpires) {
+    serverAssert(o->encoding == OBJ_ENCODING_LISTPACK_EX);
+
+    if (enc == OBJ_ENCODING_LISTPACK_EX) {
+        return;
+    } else if (enc == OBJ_ENCODING_HT) {
+        int ret;
+        hashTypeIterator *hi;
+        dict *dict;
+        dictExpireMetadata *dictExpireMeta;
+        listpackEx *lpt = o->ptr;
+        uint64_t minExpire = hashTypeGetMinExpire(o, 0);
+
+        if (hexpires && lpt->meta.trash != 1)
+            ebRemove(hexpires, &hashExpireBucketsType, o);
+
+        dict = dictCreate(&mstrHashDictTypeWithHFE);
+        dictExpand(dict,hashTypeLength(o, 0));
+        dictExpireMeta = (dictExpireMetadata *) dictMetadata(dict);
+
+        /* Fillup dict HFE metadata */
+        dictExpireMeta->key = lpt->key;       /* reference key in keyspace */
+        dictExpireMeta->hfe = ebCreate();     /* Allocate HFE DS */
+        dictExpireMeta->expireMeta.trash = 1; /* mark as trash (as long it wasn't ebAdd()) */
+
+        hi = hashTypeInitIterator(o);
+
+        while (hashTypeNext(hi, 0) != C_ERR) {
+            hfield key = hashTypeCurrentObjectNewHfield(hi);
+            sds value = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_VALUE);
+            dictUseStoredKeyApi(dict, 1);
+            ret = dictAdd(dict, key, value);
+            dictUseStoredKeyApi(dict, 0);
+            if (ret != DICT_OK) {
+                hfieldFree(key); sdsfree(value); /* Needed for gcc ASAN */
+                hashTypeReleaseIterator(hi);  /* Needed for gcc ASAN */
+                serverLogHexDump(LL_WARNING,"listpack with dup elements dump",
+                                 lpt->lp,lpBytes(lpt->lp));
+                serverPanic("Listpack corruption detected");
+            }
+
+            if (hi->expire_time != EB_EXPIRE_TIME_INVALID)
+                ebAdd(&dictExpireMeta->hfe, &hashFieldExpireBucketsType, key, hi->expire_time);
+        }
+        hashTypeReleaseIterator(hi);
+        listpackExFree(lpt);
+
+        o->encoding = OBJ_ENCODING_HT;
+        o->ptr = dict;
+
+        if (hexpires && minExpire != EB_EXPIRE_TIME_INVALID)
+            ebAdd(hexpires, &hashExpireBucketsType, o, minExpire);
+    } else {
+        serverPanic("Unknown hash encoding: %d", enc);
+    }
+}
+
+/* NOTE: hexpires can be NULL (Won't register in global HFE DS) */
+void hashTypeConvert(robj *o, int enc, ebuckets *hexpires) {
     if (o->encoding == OBJ_ENCODING_LISTPACK) {
         hashTypeConvertListpack(o, enc);
+    } else if (o->encoding == OBJ_ENCODING_LISTPACK_EX) {
+        hashTypeConvertListpackEx(o, enc, hexpires);
     } else if (o->encoding == OBJ_ENCODING_HT) {
         serverPanic("Not implemented");
     } else {
@@ -507,7 +1653,7 @@ void hashTypeConvert(robj *o, int enc) {
  * has the same encoding as the original one.
  *
  * The resulting object always has refcount set to 1 */
-robj *hashTypeDup(robj *o) {
+robj *hashTypeDup(robj *o, sds newkey, uint64_t *minHashExpire) {
     robj *hobj;
     hashTypeIterator *hi;
 
@@ -520,22 +1666,66 @@ robj *hashTypeDup(robj *o) {
         memcpy(new_zl, zl, sz);
         hobj = createObject(OBJ_HASH, new_zl);
         hobj->encoding = OBJ_ENCODING_LISTPACK;
-    } else if(o->encoding == OBJ_ENCODING_HT){
-        dict *d = dictCreate(&hashDictType);
+    } else if(o->encoding == OBJ_ENCODING_LISTPACK_EX) {
+        listpackEx *lpt = o->ptr;
+
+        if (lpt->meta.trash == 0)
+            *minHashExpire = ebGetMetaExpTime(&lpt->meta);
+
+        listpackEx *dup = listpackExCreate();
+        dup->key = newkey;
+
+        size_t sz = lpBytes(lpt->lp);
+        dup->lp = lpNew(sz);
+        memcpy(dup->lp, lpt->lp, sz);
+
+        hobj = createObject(OBJ_HASH, dup);
+        hobj->encoding = OBJ_ENCODING_LISTPACK_EX;
+    } else if(o->encoding == OBJ_ENCODING_HT) {
+        dictExpireMetadata *dictExpireMetaSrc, *dictExpireMetaDst = NULL;
+        dict *d;
+
+        /* If dict doesn't have HFE metadata, then create a new dict without it */
+        if (!isDictWithMetaHFE(o->ptr)) {
+            d = dictCreate(&mstrHashDictType);
+        } else {
+            /* Create a new dict with HFE metadata */
+            d = dictCreate(&mstrHashDictTypeWithHFE);
+            dictExpireMetaSrc = (dictExpireMetadata *) dictMetadata((dict *) o->ptr);
+            dictExpireMetaDst = (dictExpireMetadata *) dictMetadata(d);
+            dictExpireMetaDst->key = newkey;         /* reference key in keyspace */
+            dictExpireMetaDst->hfe = ebCreate();     /* Allocate HFE DS */
+            dictExpireMetaDst->expireMeta.trash = 1; /* mark as trash (as long it wasn't ebAdd()) */
+
+            /* Extract the minimum expire time of the source hash (Will be used by caller
+             * to register the new hash in the global ebuckets, i.e db->hexpires) */
+            if (dictExpireMetaSrc->expireMeta.trash == 0)
+                *minHashExpire = ebGetMetaExpTime(&dictExpireMetaSrc->expireMeta);
+        }
         dictExpand(d, dictSize((const dict*)o->ptr));
 
         hi = hashTypeInitIterator(o);
-        while (hashTypeNext(hi) != C_ERR) {
-            sds field, value;
+        while (hashTypeNext(hi, 0) != C_ERR) {
+            uint64_t expireTime;
             sds newfield, newvalue;
             /* Extract a field-value pair from an original hash object.*/
-            field = hashTypeCurrentFromHashTable(hi, OBJ_HASH_KEY);
-            value = hashTypeCurrentFromHashTable(hi, OBJ_HASH_VALUE);
-            newfield = sdsdup(field);
-            newvalue = sdsdup(value);
+            char *field, *value;
+            size_t fieldLen, valueLen;
+            hashTypeCurrentFromHashTable(hi, OBJ_HASH_KEY, &field, &fieldLen, &expireTime);
+            if (expireTime == EB_EXPIRE_TIME_INVALID) {
+                newfield = hfieldNew(field, fieldLen, 0);
+            } else {
+                newfield = hfieldNew(field, fieldLen, 1);
+                ebAdd(&dictExpireMetaDst->hfe, &hashFieldExpireBucketsType, newfield, expireTime);
+            }
+
+            hashTypeCurrentFromHashTable(hi, OBJ_HASH_VALUE, &value, &valueLen, NULL);
+            newvalue = sdsnewlen(value, valueLen);
 
             /* Add a field-value pair to a new hash object. */
+            dictUseStoredKeyApi(d, 1);
             dictAdd(d,newfield,newvalue);
+            dictUseStoredKeyApi(d, 0);
         }
         hashTypeReleaseIterator(hi);
 
@@ -564,43 +1754,360 @@ void hashReplyFromListpackEntry(client *c, listpackEntry *e) {
  * 'key' and 'val' will be set to hold the element.
  * The memory in them is not to be freed or modified by the caller.
  * 'val' can be NULL in which case it's not extracted. */
-void hashTypeRandomElement(robj *hashobj, unsigned long hashsize, listpackEntry *key, listpackEntry *val) {
+void hashTypeRandomElement(robj *hashobj, unsigned long hashsize, CommonEntry *key, CommonEntry *val) {
     if (hashobj->encoding == OBJ_ENCODING_HT) {
         dictEntry *de = dictGetFairRandomKey(hashobj->ptr);
-        sds s = dictGetKey(de);
-        key->sval = (unsigned char*)s;
-        key->slen = sdslen(s);
+        hfield field = dictGetKey(de);
+        key->sval = (unsigned char*)field;
+        key->slen = hfieldlen(field);
         if (val) {
             sds s = dictGetVal(de);
             val->sval = (unsigned char*)s;
             val->slen = sdslen(s);
         }
     } else if (hashobj->encoding == OBJ_ENCODING_LISTPACK) {
-        lpRandomPair(hashobj->ptr, hashsize, key, val);
+        lpRandomPair(hashobj->ptr, hashsize, (listpackEntry *) key, (listpackEntry *) val, 2);
+    } else if (hashobj->encoding == OBJ_ENCODING_LISTPACK_EX) {
+        lpRandomPair(hashTypeListpackGetLp(hashobj), hashsize, (listpackEntry *) key,
+                     (listpackEntry *) val, 3);
     } else {
         serverPanic("Unknown hash encoding");
     }
 }
 
+/*
+ * Active expiration of fields in hash
+ *
+ * Called by hashTypeDbActiveExpire() for each hash registered in the HFE DB
+ * (db->hexpires) with an expiration-time less than or equal current time.
+ *
+ * This callback performs the following actions for each hash:
+ * - Delete expired fields as by calling ebExpire(hash)
+ * - If afterward there are future fields to expire, it will update the hash in
+ *   HFE DB with the next hash-field minimum expiration time by returning
+ *   ACT_UPDATE_EXP_ITEM.
+ * - If the hash has no more fields to expire, it is removed from the HFE DB
+ *   by returning ACT_REMOVE_EXP_ITEM.
+ * - If hash has no more fields afterward, it will remove the hash from keyspace.
+ */
+static ExpireAction hashTypeActiveExpire(eItem item, void *ctx) {
+    ExpireCtx *expireCtx = ctx;
+
+    /* If no more quota left for this callback, stop */
+    if (expireCtx->fieldsToExpireQuota == 0)
+        return ACT_STOP_ACTIVE_EXP;
+
+    uint64_t nextExpTime = hashTypeExpire((robj *) item, expireCtx, 0);
+
+    /* If hash has no more fields to expire or got deleted, indicate
+     * to remove it from HFE DB to the caller ebExpire() */
+    if (nextExpTime == EB_EXPIRE_TIME_INVALID || nextExpTime == 0) {
+        return ACT_REMOVE_EXP_ITEM;
+    } else {
+        /* Hash has more fields to expire. Update next expiration time of the hash
+         * and indicate to add it back to global HFE DS */
+        ebSetMetaExpTime(hashGetExpireMeta(item), nextExpTime);
+        return ACT_UPDATE_EXP_ITEM;
+    }
+}
+
+/* Delete all expired fields from the hash and delete the hash if left empty.
+ *
+ * updateGlobalHFE - If the hash should be updated in the global HFE DS with new
+ *                   expiration time in case expired fields were deleted.
+ *
+ * Return next Expire time of the hash
+ * - 0 if hash got deleted
+ * - EB_EXPIRE_TIME_INVALID if no more fields to expire
+ */
+static uint64_t hashTypeExpire(robj *o, ExpireCtx *expireCtx, int updateGlobalHFE) {
+    uint64_t noExpireLeftRes = EB_EXPIRE_TIME_INVALID;
+    redisDb *db = expireCtx->db;
+    sds keystr = NULL;
+    ExpireInfo info = {0};
+
+    if (o->encoding == OBJ_ENCODING_LISTPACK_EX) {
+        info = (ExpireInfo) {
+                .maxToExpire = expireCtx->fieldsToExpireQuota,
+                .now = commandTimeSnapshot(),
+                .itemsExpired = 0};
+
+        listpackExExpire(db, o, &info);
+        server.stat_expired_hash_fields += info.itemsExpired;
+        keystr = ((listpackEx*)o->ptr)->key;
+    } else {
+        serverAssert(o->encoding == OBJ_ENCODING_HT);
+
+        dict *d = o->ptr;
+        dictExpireMetadata *dictExpireMeta = (dictExpireMetadata *) dictMetadata(d);
+
+        OnFieldExpireCtx onFieldExpireCtx = { .hashObj = o, .db = db };
+
+        info = (ExpireInfo){
+            .maxToExpire = expireCtx->fieldsToExpireQuota,
+            .onExpireItem = onFieldExpire,
+            .ctx = &onFieldExpireCtx,
+            .now = commandTimeSnapshot()
+        };
+
+        ebExpire(&dictExpireMeta->hfe, &hashFieldExpireBucketsType, &info);
+        keystr = dictExpireMeta->key;
+    }
+
+    /* Update quota left */
+    expireCtx->fieldsToExpireQuota -= info.itemsExpired;
+
+    /* In some cases, a field might have been deleted without updating the global DS.
+     * As a result, active-expire might not expire any fields, in such cases,
+     * we don't need to send notifications or perform other operations for this key. */
+    if (info.itemsExpired) {
+        robj *key = createStringObject(keystr, sdslen(keystr));
+        notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", key, db->id);
+
+        if (updateGlobalHFE)
+            ebRemove(&db->hexpires, &hashExpireBucketsType, o);
+
+        if (hashTypeLength(o, 0) == 0) {
+            dbDelete(db, key);
+            notifyKeyspaceEvent(NOTIFY_GENERIC, "del", key, db->id);
+            noExpireLeftRes = 0;
+        } else {
+            if ((updateGlobalHFE) && (info.nextExpireTime != EB_EXPIRE_TIME_INVALID))
+                ebAdd(&db->hexpires, &hashExpireBucketsType, o, info.nextExpireTime);
+        }
+
+        signalModifiedKey(NULL, db, key);
+        decrRefCount(key);
+    }
+
+    /* return 0 if hash got deleted, EB_EXPIRE_TIME_INVALID if no more fields
+     * with expiration. Else return next expiration time */
+    return (info.nextExpireTime == EB_EXPIRE_TIME_INVALID) ? noExpireLeftRes : info.nextExpireTime;
+}
+
+/* Delete all expired fields in hash if needed (Currently used only by HRANDFIELD)
+ *
+ * Return 1 if the entire hash was deleted, 0 otherwise.
+ * This function might be pricy in case there are many expired fields.
+ */
+static int hashTypeExpireIfNeeded(redisDb *db, robj *o) {
+    uint64_t nextExpireTime;
+    uint64_t minExpire = hashTypeGetMinExpire(o, 1 /*accurate*/);
+
+    /* Nothing to expire */
+    if ((mstime_t) minExpire >= commandTimeSnapshot())
+        return 0;
+
+    /* Follow expireIfNeeded() conditions of when not lazy-expire */
+    if ( (server.loading) ||
+         (server.lazy_expire_disabled) ||
+         (server.masterhost) ||  /* master-client or user-client, don't delete */
+         (isPausedActionsWithUpdate(PAUSE_ACTION_EXPIRE)))
+        return 0;
+
+    /* Take care to expire all the fields */
+    ExpireCtx expireCtx = { .db = db, .fieldsToExpireQuota = UINT32_MAX };
+    nextExpireTime = hashTypeExpire(o, &expireCtx, 1);
+    /* return 1 if the entire hash was deleted */
+    return nextExpireTime == 0;
+}
+
+/* Return the next/minimum expiry time of the hash-field.
+ * accurate=1 - Return the exact time by looking into the object DS.
+ * accurate=0 - Return the minimum expiration time maintained in expireMeta which
+ *              might not be accurate due to optimization reasons.
+ *
+ * If not found, return EB_EXPIRE_TIME_INVALID
+ */
+uint64_t hashTypeGetMinExpire(robj *o, int accurate) {
+    ExpireMeta *expireMeta = NULL;
+
+    if (!accurate) {
+        if (o->encoding == OBJ_ENCODING_LISTPACK) {
+            return EB_EXPIRE_TIME_INVALID;
+        } else if (o->encoding == OBJ_ENCODING_LISTPACK_EX) {
+            listpackEx *lpt = o->ptr;
+            expireMeta = &lpt->meta;
+        } else {
+            serverAssert(o->encoding == OBJ_ENCODING_HT);
+
+            dict *d = o->ptr;
+            if (!isDictWithMetaHFE(d))
+                return EB_EXPIRE_TIME_INVALID;
+
+            expireMeta = &((dictExpireMetadata *) dictMetadata(d))->expireMeta;
+        }
+
+        /* Keep aside next hash-field expiry before updating HFE DS. Verify it is not trash */
+        if (expireMeta->trash == 1)
+            return EB_EXPIRE_TIME_INVALID;
+
+        return ebGetMetaExpTime(expireMeta);
+    }
+
+    /* accurate == 1 */
+
+    if (o->encoding == OBJ_ENCODING_LISTPACK) {
+        return EB_EXPIRE_TIME_INVALID;
+    } else if (o->encoding == OBJ_ENCODING_LISTPACK_EX) {
+        return listpackExGetMinExpire(o);
+    } else {
+        serverAssert(o->encoding == OBJ_ENCODING_HT);
+
+        dict *d = o->ptr;
+        if (!isDictWithMetaHFE(d))
+            return EB_EXPIRE_TIME_INVALID;
+
+        dictExpireMetadata *expireMeta = (dictExpireMetadata *) dictMetadata(d);
+        return ebGetNextTimeToExpire(expireMeta->hfe, &hashFieldExpireBucketsType);
+    }
+}
+
+uint64_t hashTypeRemoveFromExpires(ebuckets *hexpires, robj *o) {
+    if (o->encoding == OBJ_ENCODING_LISTPACK) {
+        return EB_EXPIRE_TIME_INVALID;
+    } else if (o->encoding == OBJ_ENCODING_HT) {
+        /* If dict doesn't holds HFE metadata */
+        if (!isDictWithMetaHFE(o->ptr))
+            return EB_EXPIRE_TIME_INVALID;
+    }
+
+    uint64_t expireTime = ebGetExpireTime(&hashExpireBucketsType, o);
+
+    /* If registered in global HFE DS then remove it (not trash) */
+    if (expireTime != EB_EXPIRE_TIME_INVALID)
+        ebRemove(hexpires, &hashExpireBucketsType, o);
+
+    return expireTime;
+}
+
+/* Add hash to global HFE DS and update key for notifications.
+ *
+ * key         - must be the same key instance that is persisted in db->dict
+ * expireTime  - expiration in msec.
+ *               If eq. 0 then the hash will be added to the global HFE DS with
+ *               the minimum expiration time that is already written in advance
+ *               to attached metadata (which considered as trash as long as it is
+ *               not attached to global HFE DS).
+ *
+ * Precondition: It is a hash of type listpackex or HT with HFE metadata.
+ */
+void hashTypeAddToExpires(redisDb *db, sds key, robj *hashObj, uint64_t expireTime) {
+    if (expireTime > EB_EXPIRE_TIME_MAX)
+         return;
+
+    if (hashObj->encoding == OBJ_ENCODING_LISTPACK_EX) {
+        listpackEx *lpt = hashObj->ptr;
+        lpt->key = key;
+        expireTime = (expireTime) ? expireTime : ebGetMetaExpTime(&lpt->meta);
+        ebAdd(&db->hexpires, &hashExpireBucketsType, hashObj, expireTime);
+    } else if (hashObj->encoding == OBJ_ENCODING_HT) {
+        dict *d = hashObj->ptr;
+        if (isDictWithMetaHFE(d)) {
+            dictExpireMetadata *meta = (dictExpireMetadata *) dictMetadata(d);
+            expireTime = (expireTime) ? expireTime : ebGetMetaExpTime(&meta->expireMeta);
+            meta->key = key;
+            ebAdd(&db->hexpires, &hashExpireBucketsType, hashObj, expireTime);
+        }
+    }
+}
+
+/* DB active expire and update hashes with time-expiration on fields.
+ *
+ * The callback function hashTypeActiveExpire() is invoked for each hash registered
+ * in the HFE DB (db->expires) with an expiration-time less than or equal to the
+ * current time. This callback performs the following actions for each hash:
+ * - If the hash has one or more fields to expire, it will delete those fields.
+ * - If there are more fields to expire, it will update the hash with the next
+ *   expiration time in HFE DB.
+ * - If the hash has no more fields to expire, it is removed from the HFE DB.
+ * - If the hash has no more fields, it is removed from the main DB.
+ *
+ * Returns number of fields active-expired.
+ */
+uint64_t hashTypeDbActiveExpire(redisDb *db, uint32_t maxFieldsToExpire) {
+    ExpireCtx ctx = { .db = db, .fieldsToExpireQuota = maxFieldsToExpire };
+    ExpireInfo info = {
+            .maxToExpire = UINT64_MAX, /* Only maxFieldsToExpire play a role */
+            .onExpireItem = hashTypeActiveExpire,
+            .ctx = &ctx,
+            .now = commandTimeSnapshot(),
+            .itemsExpired = 0};
+
+    ebExpire(&db->hexpires, &hashExpireBucketsType, &info);
+
+    /* Return number of fields active-expired */
+    return maxFieldsToExpire - ctx.fieldsToExpireQuota;
+}
+
+void hashTypeFree(robj *o) {
+    switch (o->encoding) {
+        case OBJ_ENCODING_HT:
+            /* Verify hash is not registered in global HFE ds */
+            if (isDictWithMetaHFE((dict*)o->ptr)) {
+                dictExpireMetadata *m = (dictExpireMetadata *)dictMetadata((dict*)o->ptr);
+                serverAssert(m->expireMeta.trash == 1);
+            }
+            dictRelease((dict*) o->ptr);
+            break;
+        case OBJ_ENCODING_LISTPACK:
+            lpFree(o->ptr);
+            break;
+        case OBJ_ENCODING_LISTPACK_EX:
+            /* Verify hash is not registered in global HFE ds */
+            serverAssert(((listpackEx *) o->ptr)->meta.trash == 1);
+            listpackExFree(o->ptr);
+            break;
+        default:
+            serverPanic("Unknown hash encoding type");
+            break;
+    }
+}
+
+/* Attempts to update the reference to the new key. Now it's only used in defrag. */
+void hashTypeUpdateKeyRef(robj *o, sds newkey) {
+    if (o->encoding == OBJ_ENCODING_LISTPACK_EX) {
+        listpackEx *lpt = o->ptr;
+        lpt->key = newkey;
+    } else if (o->encoding == OBJ_ENCODING_HT && isDictWithMetaHFE(o->ptr)) {
+        dictExpireMetadata *dictExpireMeta = (dictExpireMetadata *)dictMetadata((dict*)o->ptr);
+        dictExpireMeta->key = newkey;
+    } else {
+        /* Nothing to do. */
+    }
+}
+
+ebuckets *hashTypeGetDictMetaHFE(dict *d) {
+    dictExpireMetadata *dictExpireMeta = (dictExpireMetadata *) dictMetadata(d);
+    return &dictExpireMeta->hfe;
+}
 
 /*-----------------------------------------------------------------------------
  * Hash type commands
  *----------------------------------------------------------------------------*/
 
 void hsetnxCommand(client *c) {
+    int isHashDeleted;
     robj *o;
     if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
 
-    if (hashTypeExists(o, c->argv[2]->ptr)) {
+    if (hashTypeExists(c->db, o, c->argv[2]->ptr, HFE_LAZY_EXPIRE, &isHashDeleted)) {
         addReply(c, shared.czero);
-    } else {
-        hashTypeTryConversion(o,c->argv,2,3);
-        hashTypeSet(o,c->argv[2]->ptr,c->argv[3]->ptr,HASH_SET_COPY);
-        addReply(c, shared.cone);
-        signalModifiedKey(c,c->db,c->argv[1]);
-        notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[1],c->db->id);
-        server.dirty++;
+        return;
     }
+
+    /* Field expired and in turn hash deleted. Create new one! */
+    if (isHashDeleted) {
+        o = createHashObject();
+        dbAdd(c->db,c->argv[1],o);
+    }
+
+    hashTypeTryConversion(c->db, o,c->argv,2,3);
+    hashTypeSet(c->db, o,c->argv[2]->ptr,c->argv[3]->ptr,HASH_SET_COPY);
+    addReply(c, shared.cone);
+    signalModifiedKey(c,c->db,c->argv[1]);
+    notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[1],c->db->id);
+    server.dirty++;
 }
 
 void hsetCommand(client *c) {
@@ -613,10 +2120,10 @@ void hsetCommand(client *c) {
     }
 
     if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
-    hashTypeTryConversion(o,c->argv,2,c->argc-1);
+    hashTypeTryConversion(c->db,o,c->argv,2,c->argc-1);
 
     for (i = 2; i < c->argc; i += 2)
-        created += !hashTypeSet(o,c->argv[i]->ptr,c->argv[i+1]->ptr,HASH_SET_COPY);
+        created += !hashTypeSet(c->db, o,c->argv[i]->ptr,c->argv[i+1]->ptr,HASH_SET_COPY);
 
     /* HMSET (deprecated) and HSET return value is different. */
     char *cmdname = c->argv[0]->ptr;
@@ -641,14 +2148,22 @@ void hincrbyCommand(client *c) {
 
     if (getLongLongFromObjectOrReply(c,c->argv[3],&incr,NULL) != C_OK) return;
     if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
-    if (hashTypeGetValue(o,c->argv[2]->ptr,&vstr,&vlen,&value) == C_OK) {
+
+    GetFieldRes res = hashTypeGetValue(c->db,o,c->argv[2]->ptr,&vstr,&vlen,&value,
+                                       HFE_LAZY_EXPIRE);
+    if (res == GETF_OK) {
         if (vstr) {
             if (string2ll((char*)vstr,vlen,&value) == 0) {
                 addReplyError(c,"hash value is not an integer");
                 return;
             }
         } /* Else hashTypeGetValue() already stored it into &value */
+    } else if ((res == GETF_NOT_FOUND) || (res == GETF_EXPIRED)) {
+        value = 0;
     } else {
+        /* Field expired and in turn hash deleted. Create new one! */
+        o = createHashObject();
+        dbAdd(c->db,c->argv[1],o);
         value = 0;
     }
 
@@ -660,7 +2175,7 @@ void hincrbyCommand(client *c) {
     }
     value += incr;
     new = sdsfromlonglong(value);
-    hashTypeSet(o,c->argv[2]->ptr,new,HASH_SET_TAKE_VALUE);
+    hashTypeSet(c->db, o,c->argv[2]->ptr,new,HASH_SET_TAKE_VALUE | HASH_SET_KEEP_TTL);
     addReplyLongLong(c,value);
     signalModifiedKey(c,c->db,c->argv[1]);
     notifyKeyspaceEvent(NOTIFY_HASH,"hincrby",c->argv[1],c->db->id);
@@ -681,7 +2196,9 @@ void hincrbyfloatCommand(client *c) {
         return;
     }
     if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
-    if (hashTypeGetValue(o,c->argv[2]->ptr,&vstr,&vlen,&ll) == C_OK) {
+    GetFieldRes res = hashTypeGetValue(c->db, o,c->argv[2]->ptr,&vstr,&vlen,&ll,
+                                       HFE_LAZY_EXPIRE);
+    if (res == GETF_OK) {
         if (vstr) {
             if (string2ld((char*)vstr,vlen,&value) == 0) {
                 addReplyError(c,"hash value is not a float");
@@ -690,7 +2207,12 @@ void hincrbyfloatCommand(client *c) {
         } else {
             value = (long double)ll;
         }
+    } else if ((res == GETF_NOT_FOUND) || (res == GETF_EXPIRED)) {
+        value = 0;
     } else {
+        /* Field expired and in turn hash deleted. Create new one! */
+        o = createHashObject();
+        dbAdd(c->db,c->argv[1],o);
         value = 0;
     }
 
@@ -703,7 +2225,7 @@ void hincrbyfloatCommand(client *c) {
     char buf[MAX_LONG_DOUBLE_CHARS];
     int len = ld2string(buf,sizeof(buf),value,LD_STR_HUMAN);
     new = sdsnewlen(buf,len);
-    hashTypeSet(o,c->argv[2]->ptr,new,HASH_SET_TAKE_VALUE);
+    hashTypeSet(c->db, o,c->argv[2]->ptr,new,HASH_SET_TAKE_VALUE | HASH_SET_KEEP_TTL);
     addReplyBulkCBuffer(c,buf,len);
     signalModifiedKey(c,c->db,c->argv[1]);
     notifyKeyspaceEvent(NOTIFY_HASH,"hincrbyfloat",c->argv[1],c->db->id);
@@ -719,17 +2241,18 @@ void hincrbyfloatCommand(client *c) {
     decrRefCount(newobj);
 }
 
-static void addHashFieldToReply(client *c, robj *o, sds field) {
+static GetFieldRes addHashFieldToReply(client *c, robj *o, sds field, int hfeFlags) {
     if (o == NULL) {
         addReplyNull(c);
-        return;
+        return GETF_NOT_FOUND;
     }
 
     unsigned char *vstr = NULL;
     unsigned int vlen = UINT_MAX;
     long long vll = LLONG_MAX;
 
-    if (hashTypeGetValue(o, field, &vstr, &vlen, &vll) == C_OK) {
+    GetFieldRes res = hashTypeGetValue(c->db, o, field, &vstr, &vlen, &vll, hfeFlags);
+    if (res == GETF_OK) {
         if (vstr) {
             addReplyBulkCBuffer(c, vstr, vlen);
         } else {
@@ -738,6 +2261,7 @@ static void addHashFieldToReply(client *c, robj *o, sds field) {
     } else {
         addReplyNull(c);
     }
+    return res;
 }
 
 void hgetCommand(client *c) {
@@ -746,12 +2270,14 @@ void hgetCommand(client *c) {
     if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp])) == NULL ||
         checkType(c,o,OBJ_HASH)) return;
 
-    addHashFieldToReply(c, o, c->argv[2]->ptr);
+    addHashFieldToReply(c, o, c->argv[2]->ptr, HFE_LAZY_EXPIRE);
 }
 
 void hmgetCommand(client *c) {
+    GetFieldRes res = GETF_OK;
     robj *o;
     int i;
+    int expired = 0, deleted = 0;
 
     /* Don't abort when the key cannot be found. Non-existing keys are empty
      * hashes, where HMGET should respond with a series of null bulks. */
@@ -759,8 +2285,22 @@ void hmgetCommand(client *c) {
     if (checkType(c,o,OBJ_HASH)) return;
 
     addReplyArrayLen(c, c->argc-2);
-    for (i = 2; i < c->argc; i++) {
-        addHashFieldToReply(c, o, c->argv[i]->ptr);
+    for (i = 2; i < c->argc ; i++) {
+        if (!deleted) {
+            res = addHashFieldToReply(c, o, c->argv[i]->ptr, HFE_LAZY_NO_NOTIFICATION);
+            expired += (res == GETF_EXPIRED);
+            deleted += (res == GETF_EXPIRED_HASH);
+        } else {
+            /* If hash got lazy expired since all fields are expired (o is invalid),
+             * then fill the rest with trivial nulls and return. */
+            addReplyNull(c);
+        }
+    }
+
+    if (expired) {
+        notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", c->argv[1], c->db->id);
+        if (deleted)
+            notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); 
     }
 }
 
@@ -772,9 +2312,9 @@ void hdelCommand(client *c) {
         checkType(c,o,OBJ_HASH)) return;
 
     for (j = 2; j < c->argc; j++) {
-        if (hashTypeDelete(o,c->argv[j]->ptr)) {
+        if (hashTypeDelete(o,c->argv[j]->ptr,1)) {
             deleted++;
-            if (hashTypeLength(o) == 0) {
+            if (hashTypeLength(o, 0) == 0) {
                 dbDelete(c->db,c->argv[1]);
                 keyremoved = 1;
                 break;
@@ -798,31 +2338,48 @@ void hlenCommand(client *c) {
     if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
         checkType(c,o,OBJ_HASH)) return;
 
-    addReplyLongLong(c,hashTypeLength(o));
+    addReplyLongLong(c,hashTypeLength(o, 0));
 }
 
 void hstrlenCommand(client *c) {
     robj *o;
+    unsigned char *vstr = NULL;
+    unsigned int vlen = UINT_MAX;
+    long long vll = LLONG_MAX;
 
     if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
         checkType(c,o,OBJ_HASH)) return;
-    addReplyLongLong(c,hashTypeGetValueLength(o,c->argv[2]->ptr));
+
+    GetFieldRes res = hashTypeGetValue(c->db, o, c->argv[2]->ptr, &vstr, &vlen, &vll,
+                                       HFE_LAZY_EXPIRE);
+
+    if (res == GETF_NOT_FOUND || res == GETF_EXPIRED || res == GETF_EXPIRED_HASH) {
+        addReply(c, shared.czero);
+        return;
+    }
+
+    size_t len = vstr ? vlen : sdigits10(vll);
+    addReplyLongLong(c,len);
 }
 
 static void addHashIteratorCursorToReply(client *c, hashTypeIterator *hi, int what) {
-    if (hi->encoding == OBJ_ENCODING_LISTPACK) {
+    if (hi->encoding == OBJ_ENCODING_LISTPACK ||
+        hi->encoding == OBJ_ENCODING_LISTPACK_EX)
+    {
         unsigned char *vstr = NULL;
         unsigned int vlen = UINT_MAX;
         long long vll = LLONG_MAX;
 
-        hashTypeCurrentFromListpack(hi, what, &vstr, &vlen, &vll);
+        hashTypeCurrentFromListpack(hi, what, &vstr, &vlen, &vll, NULL);
         if (vstr)
             addReplyBulkCBuffer(c, vstr, vlen);
         else
             addReplyBulkLongLong(c, vll);
     } else if (hi->encoding == OBJ_ENCODING_HT) {
-        sds value = hashTypeCurrentFromHashTable(hi, what);
-        addReplyBulkCBuffer(c, value, sdslen(value));
+        char *value;
+        size_t len;
+        hashTypeCurrentFromHashTable(hi, what, &value, &len, NULL);
+        addReplyBulkCBuffer(c, value, len);
     } else {
         serverPanic("Unknown hash encoding");
     }
@@ -840,7 +2397,7 @@ void genericHgetallCommand(client *c, int flags) {
 
     /* We return a map if the user requested keys and values, like in the
      * HGETALL case. Otherwise to use a flat array makes more sense. */
-    length = hashTypeLength(o);
+    length = hashTypeLength(o, 1 /*subtractExpiredFields*/);
     if (flags & OBJ_HASH_KEY && flags & OBJ_HASH_VALUE) {
         addReplyMapLen(c, length);
     } else {
@@ -848,7 +2405,12 @@ void genericHgetallCommand(client *c, int flags) {
     }
 
     hi = hashTypeInitIterator(o);
-    while (hashTypeNext(hi) != C_ERR) {
+
+    /* Skip expired fields if the hash has an expire time set at global HFE DS. We could
+     * set it to constant 1, but then it will make another lookup for each field expiration */
+    int skipExpiredFields = (EB_EXPIRE_TIME_INVALID == hashTypeGetMinExpire(o, 0)) ? 0 : 1;
+
+    while (hashTypeNext(hi, skipExpiredFields) != C_ERR) {
         if (flags & OBJ_HASH_KEY) {
             addHashIteratorCursorToReply(c, hi, OBJ_HASH_KEY);
             count++;
@@ -883,16 +2445,18 @@ void hexistsCommand(client *c) {
     if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
         checkType(c,o,OBJ_HASH)) return;
 
-    addReply(c, hashTypeExists(o,c->argv[2]->ptr) ? shared.cone : shared.czero);
+    addReply(c,hashTypeExists(c->db,o,c->argv[2]->ptr,HFE_LAZY_EXPIRE, NULL) ?
+                                shared.cone : shared.czero);
 }
 
 void hscanCommand(client *c) {
     robj *o;
-    unsigned long cursor;
+    unsigned long long cursor;
 
     if (parseScanCursorOrReply(c,c->argv[2],&cursor) == C_ERR) return;
     if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptyscan)) == NULL ||
         checkType(c,o,OBJ_HASH)) return;
+
     scanGenericCommand(c,o,cursor);
 }
 
@@ -930,7 +2494,6 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
 
     if ((hash = lookupKeyReadOrReply(c,c->argv[1],shared.emptyarray))
         == NULL || checkType(c,hash,OBJ_HASH)) return;
-    size = hashTypeLength(hash);
 
     if(l >= 0) {
         count = (unsigned long) l;
@@ -939,6 +2502,15 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
         uniq = 0;
     }
 
+    /* Delete all expired fields. If the entire hash got deleted then return empty array. */
+    if (hashTypeExpireIfNeeded(c->db, hash)) {
+        addReply(c, shared.emptyarray);
+        return;
+    }
+
+    /* Delete expired fields */
+    size = hashTypeLength(hash, 0);
+
     /* If count is zero, serve it ASAP to avoid special cases later. */
     if (count == 0) {
         addReply(c,shared.emptyarray);
@@ -956,22 +2528,25 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
         else
             addReplyArrayLen(c, count);
         if (hash->encoding == OBJ_ENCODING_HT) {
-            sds key, value;
             while (count--) {
                 dictEntry *de = dictGetFairRandomKey(hash->ptr);
-                key = dictGetKey(de);
-                value = dictGetVal(de);
+                hfield field = dictGetKey(de);
+                sds value = dictGetVal(de);
                 if (withvalues && c->resp > 2)
                     addReplyArrayLen(c,2);
-                addReplyBulkCBuffer(c, key, sdslen(key));
+                addReplyBulkCBuffer(c, field, hfieldlen(field));
                 if (withvalues)
                     addReplyBulkCBuffer(c, value, sdslen(value));
                 if (c->flags & CLIENT_CLOSE_ASAP)
                     break;
             }
-        } else if (hash->encoding == OBJ_ENCODING_LISTPACK) {
+        } else if (hash->encoding == OBJ_ENCODING_LISTPACK ||
+                   hash->encoding == OBJ_ENCODING_LISTPACK_EX)
+        {
             listpackEntry *keys, *vals = NULL;
             unsigned long limit, sample_count;
+            unsigned char *lp = hashTypeListpackGetLp(hash);
+            int tuple_len = hash->encoding == OBJ_ENCODING_LISTPACK ? 2 : 3;
 
             limit = count > HRANDFIELD_RANDOM_SAMPLE_LIMIT ? HRANDFIELD_RANDOM_SAMPLE_LIMIT : count;
             keys = zmalloc(sizeof(listpackEntry)*limit);
@@ -980,7 +2555,7 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
             while (count) {
                 sample_count = count > limit ? limit : count;
                 count -= sample_count;
-                lpRandomPairs(hash->ptr, sample_count, keys, vals);
+                lpRandomPairs(lp, sample_count, keys, vals, tuple_len);
                 hrandfieldReplyWithListpack(c, sample_count, keys, vals);
                 if (c->flags & CLIENT_CLOSE_ASAP)
                     break;
@@ -1003,7 +2578,7 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
     * elements inside the hash: simply return the whole hash. */
     if(count >= size) {
         hashTypeIterator *hi = hashTypeInitIterator(hash);
-        while (hashTypeNext(hi) != C_ERR) {
+        while (hashTypeNext(hi, 0) != C_ERR) {
             if (withvalues && c->resp > 2)
                 addReplyArrayLen(c,2);
             addHashIteratorCursorToReply(c, hi, OBJ_HASH_KEY);
@@ -1022,12 +2597,16 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
      *
      * And it is inefficient to repeatedly pick one random element from a
      * listpack in CASE 4. So we use this instead. */
-    if (hash->encoding == OBJ_ENCODING_LISTPACK) {
+    if (hash->encoding == OBJ_ENCODING_LISTPACK ||
+        hash->encoding == OBJ_ENCODING_LISTPACK_EX)
+    {
+        unsigned char *lp = hashTypeListpackGetLp(hash);
+        int tuple_len = hash->encoding == OBJ_ENCODING_LISTPACK ? 2 : 3;
         listpackEntry *keys, *vals = NULL;
         keys = zmalloc(sizeof(listpackEntry)*count);
         if (withvalues)
             vals = zmalloc(sizeof(listpackEntry)*count);
-        serverAssert(lpRandomPairsUnique(hash->ptr, count, keys, vals) == count);
+        serverAssert(lpRandomPairsUnique(lp, count, keys, vals, tuple_len) == count);
         hrandfieldReplyWithListpack(c, count, keys, vals);
         zfree(keys);
         zfree(vals);
@@ -1035,62 +2614,50 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
     }
 
     /* CASE 3:
-     * The number of elements inside the hash is not greater than
+     * The number of elements inside the hash of type dict is not greater than
      * HRANDFIELD_SUB_STRATEGY_MUL times the number of requested elements.
-     * In this case we create a hash from scratch with all the elements, and
-     * subtract random elements to reach the requested number of elements.
+     * In this case we create an array of dictEntry pointers from the original hash,
+     * and subtract random elements to reach the requested number of elements.
      *
      * This is done because if the number of requested elements is just
      * a bit less than the number of elements in the hash, the natural approach
      * used into CASE 4 is highly inefficient. */
     if (count*HRANDFIELD_SUB_STRATEGY_MUL > size) {
         /* Hashtable encoding (generic implementation) */
-        dict *d = dictCreate(&sdsReplyDictType);
-        dictExpand(d, size);
-        hashTypeIterator *hi = hashTypeInitIterator(hash);
-
-        /* Add all the elements into the temporary dictionary. */
-        while ((hashTypeNext(hi)) != C_ERR) {
-            int ret = DICT_ERR;
-            sds key, value = NULL;
-
-            key = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_KEY);
-            if (withvalues)
-                value = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_VALUE);
-            ret = dictAdd(d, key, value);
-
-            serverAssert(ret == DICT_OK);
-        }
-        serverAssert(dictSize(d) == size);
-        hashTypeReleaseIterator(hi);
+        dict *ht = hash->ptr;
+        dictIterator *di;
+        dictEntry *de;
+        unsigned long idx = 0;
+
+        /* Allocate a temporary array of pointers to stored key-values in dict and
+         * assist it to remove random elements to reach the right count. */
+        struct FieldValPair {
+            hfield field;
+            sds value;
+        } *pairs = zmalloc(sizeof(struct FieldValPair) * size);
+
+        /* Add all the elements into the temporary array. */
+        di = dictGetIterator(ht);
+        while((de = dictNext(di)) != NULL)
+              pairs[idx++] = (struct FieldValPair) {dictGetKey(de), dictGetVal(de)};
+        dictReleaseIterator(di);
 
         /* Remove random elements to reach the right count. */
         while (size > count) {
-            dictEntry *de;
-            de = dictGetFairRandomKey(d);
-            dictUnlink(d,dictGetKey(de));
-            sdsfree(dictGetKey(de));
-            sdsfree(dictGetVal(de));
-            dictFreeUnlinkedEntry(d,de);
-            size--;
+            unsigned long toDiscardIdx = rand() % size;
+            pairs[toDiscardIdx] = pairs[--size];
         }
 
-        /* Reply with what's in the dict and release memory */
-        dictIterator *di;
-        dictEntry *de;
-        di = dictGetIterator(d);
-        while ((de = dictNext(di)) != NULL) {
-            sds key = dictGetKey(de);
-            sds value = dictGetVal(de);
+        /* Reply with what's in the array */
+        for (idx = 0; idx < size; idx++) {
             if (withvalues && c->resp > 2)
                 addReplyArrayLen(c,2);
-            addReplyBulkSds(c, key);
+            addReplyBulkCBuffer(c, pairs[idx].field, hfieldlen(pairs[idx].field));
             if (withvalues)
-                addReplyBulkSds(c, value);
+                addReplyBulkCBuffer(c, pairs[idx].value, sdslen(pairs[idx].value));
         }
 
-        dictReleaseIterator(di);
-        dictRelease(d);
+        zfree(pairs);
     }
 
     /* CASE 4: We have a big hash compared to the requested number of elements.
@@ -1098,43 +2665,78 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
      * to the temporary hash, trying to eventually get enough unique elements
      * to reach the specified count. */
     else {
+        /* Allocate temporary dictUnique to find unique elements. Just keep ref
+         * to key-value from the original hash. This dict relaxes hash function
+         * to be based on field's pointer */
+        dictType uniqueDictType = { .hashFunction =  dictPtrHash };
+        dict *dictUnique = dictCreate(&uniqueDictType);
+        dictExpand(dictUnique, count);
+
         /* Hashtable encoding (generic implementation) */
         unsigned long added = 0;
-        listpackEntry key, value;
-        dict *d = dictCreate(&hashDictType);
-        dictExpand(d, count);
+
         while(added < count) {
-            hashTypeRandomElement(hash, size, &key, withvalues? &value : NULL);
+            dictEntry *de = dictGetFairRandomKey(hash->ptr);
+            serverAssert(de != NULL);
+            hfield field = dictGetKey(de);
+            sds value = dictGetVal(de);
 
             /* Try to add the object to the dictionary. If it already exists
             * free it, otherwise increment the number of objects we have
             * in the result dictionary. */
-            sds skey = hashSdsFromListpackEntry(&key);
-            if (dictAdd(d,skey,NULL) != DICT_OK) {
-                sdsfree(skey);
+            if (dictAdd(dictUnique, field, value) != DICT_OK)
                 continue;
-            }
+
             added++;
 
             /* We can reply right away, so that we don't need to store the value in the dict. */
             if (withvalues && c->resp > 2)
                 addReplyArrayLen(c,2);
-            hashReplyFromListpackEntry(c, &key);
+
+            addReplyBulkCBuffer(c, field, hfieldlen(field));
             if (withvalues)
-                hashReplyFromListpackEntry(c, &value);
+                addReplyBulkCBuffer(c, value, sdslen(value));
         }
 
         /* Release memory */
-        dictRelease(d);
+        dictRelease(dictUnique);
     }
 }
 
-/* HRANDFIELD key [<count> [WITHVALUES]] */
+/*
+ * HRANDFIELD - Return a random field from the hash value stored at key.
+ * CLI usage: HRANDFIELD key [<count> [WITHVALUES]]
+ *
+ * Considerations for the current imp of HRANDFIELD & HFE feature:
+ *  HRANDFIELD might access any of the fields in the hash as some of them might
+ *  be expired. And so the Implementation of HRANDFIELD along with HFEs
+ *  might be one of the two options:
+ *  1. Expire hash-fields before diving into handling HRANDFIELD.
+ *  2. Refine HRANDFIELD cases to deal with expired fields.
+ *
+ *  Regarding the first option, as reference, the command RANDOMKEY also declares
+ *  on O(1) complexity, yet might be stuck on a very long (but not infinite) loop
+ *  trying to find non-expired keys. Furthermore RANDOMKEY also evicts expired keys
+ *  along the way even though it is categorized as a read-only command. Note that
+ *  the case of HRANDFIELD is more lightweight versus RANDOMKEY since HFEs have
+ *  much more effective and aggressive active-expiration for fields behind.
+ *
+ *  The second option introduces additional implementation complexity to HRANDFIELD.
+ *  We could further refine HRANDFIELD cases to differentiate between scenarios
+ *  with many expired fields versus few expired fields, and adjust based on the
+ *  percentage of expired fields. However, this approach could still lead to long
+ *  loops or necessitate expiring fields before selecting them. For the “lightweight”
+ *  cases it is also expected to have a lightweight expiration.
+ *
+ *  Considering the pros and cons, and the fact that HRANDFIELD is an infrequent
+ *  command (particularly with HFEs) and the fact we have effective active-expiration
+ *  behind for hash-fields, it is better to keep it simple and choose the option #1.
+ */
 void hrandfieldCommand(client *c) {
     long l;
     int withvalues = 0;
     robj *hash;
-    listpackEntry ele;
+    CommonEntry ele;
 
     if (c->argc >= 3) {
         if (getRangeLongFromObjectOrReply(c,c->argv[2],-LONG_MAX,LONG_MAX,&l,NULL) != C_OK) return;
@@ -1158,6 +2760,602 @@ void hrandfieldCommand(client *c) {
         return;
     }
 
-    hashTypeRandomElement(hash,hashTypeLength(hash),&ele,NULL);
-    hashReplyFromListpackEntry(c, &ele);
+    /* Delete all expired fields. If the entire hash got deleted then return null. */
+    if (hashTypeExpireIfNeeded(c->db, hash)) {
+        addReply(c,shared.null[c->resp]);
+        return;
+    }
+
+    hashTypeRandomElement(hash,hashTypeLength(hash, 0),&ele,NULL);
+
+    if (ele.sval)
+        addReplyBulkCBuffer(c, ele.sval, ele.slen);
+    else
+        addReplyBulkLongLong(c, ele.lval);
+}
+
+/*-----------------------------------------------------------------------------
+ * Hash Field with optional expiry (based on mstr)
+ *----------------------------------------------------------------------------*/
+static hfield _hfieldNew(const void *field, size_t fieldlen, int withExpireMeta,
+                         int trymalloc)
+{
+    if (!withExpireMeta)
+        return mstrNew(field, fieldlen, trymalloc);
+
+    hfield hf = mstrNewWithMeta(&mstrFieldKind, field, fieldlen,
+                                (mstrFlags) 1 << HFIELD_META_EXPIRE, trymalloc);
+
+    if (!hf) return NULL;
+
+    ExpireMeta *expireMeta = mstrMetaRef(hf, &mstrFieldKind, HFIELD_META_EXPIRE);
+
+    /* as long as it is not inside ebuckets, it is considered trash */
+    expireMeta->trash = 1;
+    return hf;
+}
+
+/* if expireAt is 0, then expireAt is ignored and no metadata is attached */
+hfield hfieldNew(const void *field, size_t fieldlen, int withExpireMeta) {
+    return _hfieldNew(field, fieldlen, withExpireMeta, 0);
+}
+
+hfield hfieldTryNew(const void *field, size_t fieldlen, int withExpireMeta) {
+    return _hfieldNew(field, fieldlen, withExpireMeta, 1);
+}
+
+int hfieldIsExpireAttached(hfield field) {
+    return mstrIsMetaAttached(field) && mstrGetFlag(field, (int) HFIELD_META_EXPIRE);
+}
+
+static ExpireMeta* hfieldGetExpireMeta(const eItem field) {
+    /* extract the expireMeta from the field of type mstr */
+    return mstrMetaRef(field, &mstrFieldKind, (int) HFIELD_META_EXPIRE);
+}
+
+/* returned value is unix time in milliseconds */
+uint64_t hfieldGetExpireTime(hfield field) {
+    if (!hfieldIsExpireAttached(field))
+        return EB_EXPIRE_TIME_INVALID;
+
+    ExpireMeta *expireMeta = mstrMetaRef(field, &mstrFieldKind, (int) HFIELD_META_EXPIRE);
+    if (expireMeta->trash)
+        return EB_EXPIRE_TIME_INVALID;
+
+    return ebGetMetaExpTime(expireMeta);
+}
+
+/* Remove TTL from the field. Assumed ExpireMeta is attached and has valid value */
+static void hfieldPersist(robj *hashObj, hfield field) {
+    uint64_t fieldExpireTime = hfieldGetExpireTime(field);
+    if (fieldExpireTime == EB_EXPIRE_TIME_INVALID)
+        return;
+
+    /* if field is set with expire, then dict must has HFE metadata attached */
+    dict *d = hashObj->ptr;
+    dictExpireMetadata *dictExpireMeta = (dictExpireMetadata *)dictMetadata(d);
+
+    /* If field has valid expiry then dict must have valid metadata as well */
+    serverAssert(dictExpireMeta->expireMeta.trash == 0);
+
+    /* Remove field from private HFE DS */
+    ebRemove(&dictExpireMeta->hfe, &hashFieldExpireBucketsType, field);
+
+    /* Don't have to update global HFE DS. It's unnecessary. Implementing this
+     * would introduce significant complexity and overhead for an operation that
+     * isn't critical. In the worst case scenario, the hash will be efficiently
+     * updated later by an active-expire operation, or it will be removed by the
+     * hash's dbGenericDelete() function. */
+}
+
+int hfieldIsExpired(hfield field) {
+    /* Condition remains valid even if hfieldGetExpireTime() returns EB_EXPIRE_TIME_INVALID,
+     * as the constant is equivalent to (EB_EXPIRE_TIME_MAX + 1). */
+    return ( (mstime_t)hfieldGetExpireTime(field) < commandTimeSnapshot());
+}
+
+/*-----------------------------------------------------------------------------
+ * Hash Field Expiration (HFE)
+ *----------------------------------------------------------------------------*/
+/*  Can be called either by active-expire cron job or query from the client */
+static void propagateHashFieldDeletion(redisDb *db, sds key, char *field, size_t fieldLen) {
+    robj *argv[] = {
+        shared.hdel,
+        createStringObject((char*) key, sdslen(key)),
+        createStringObject(field, fieldLen)
+    };
+
+    enterExecutionUnit(1, 0);
+    int prev_replication_allowed = server.replication_allowed;
+    server.replication_allowed = 1;
+    alsoPropagate(db->id,argv, 3, PROPAGATE_AOF|PROPAGATE_REPL);
+    server.replication_allowed = prev_replication_allowed;
+    exitExecutionUnit();
+
+    /* Propagate the HDEL command */
+    postExecutionUnitOperations();
+
+    decrRefCount(argv[1]);
+    decrRefCount(argv[2]);
+}
+
+/* Called during active expiration of hash-fields. Propagate to replica & Delete. */
+static ExpireAction onFieldExpire(eItem item, void *ctx) {
+    OnFieldExpireCtx *expCtx = ctx;
+    hfield hf = item;
+    dict *d = expCtx->hashObj->ptr;
+    dictExpireMetadata *dictExpireMeta = (dictExpireMetadata *) dictMetadata(d);
+    propagateHashFieldDeletion(expCtx->db, dictExpireMeta->key, hf, hfieldlen(hf));
+    serverAssert(hashTypeDelete(expCtx->hashObj, hf, 0) == 1);
+    server.stat_expired_hash_fields++;
+    return ACT_REMOVE_EXP_ITEM;
+}
+
+/* Retrieve the ExpireMeta associated with the hash.
+ * The caller is responsible for ensuring that it is indeed attached. */
+static ExpireMeta *hashGetExpireMeta(const eItem hash) {
+    robj *hashObj = (robj *)hash;
+    if (hashObj->encoding == OBJ_ENCODING_LISTPACK_EX) {
+        listpackEx *lpt = hashObj->ptr;
+        return &lpt->meta;
+    } else if (hashObj->encoding == OBJ_ENCODING_HT) {
+        dict *d = hashObj->ptr;
+        dictExpireMetadata *dictExpireMeta = (dictExpireMetadata *) dictMetadata(d);
+        return &dictExpireMeta->expireMeta;
+    } else {
+        serverPanic("Unknown encoding: %d", hashObj->encoding);
+    }
+}
+
+/* HTTL key <FIELDS count field [field ...]>  */
+static void httlGenericCommand(client *c, const char *cmd, long long basetime, int unit) {
+    UNUSED(cmd);
+    robj *hashObj;
+    long numFields = 0, numFieldsAt = 3;
+
+    /* Read the hash object */
+    hashObj = lookupKeyRead(c->db, c->argv[1]);
+    if (checkType(c, hashObj, OBJ_HASH))
+        return;
+
+    if (strcasecmp(c->argv[numFieldsAt-1]->ptr, "FIELDS")) {
+        addReplyError(c, "Mandatory argument FIELDS is missing or not at the right position");
+        return;
+    }
+
+    /* Read number of fields */
+    if (getRangeLongFromObjectOrReply(c, c->argv[numFieldsAt], 1, LONG_MAX,
+                                      &numFields, "Number of fields must be a positive integer") != C_OK)
+        return;
+
+    /* Verify `numFields` is consistent with number of arguments */
+    if (numFields != (c->argc - numFieldsAt - 1)) {
+        addReplyError(c, "The `numfields` parameter must match the number of arguments");
+        return;
+    }
+
+    /* Non-existing keys and empty hashes are the same thing. It also means
+     * fields in the command don't exist in the hash key. */
+    if (!hashObj) {
+        addReplyArrayLen(c, numFields);
+        for (int i = 0; i < numFields; i++) {
+            addReplyLongLong(c, HFE_GET_NO_FIELD);
+        }
+        return;
+    }
+
+    if (hashObj->encoding == OBJ_ENCODING_LISTPACK) {
+        void *lp = hashObj->ptr;
+
+        addReplyArrayLen(c, numFields);
+        for (int i = 0 ; i < numFields ; i++) {
+            sds field = c->argv[numFieldsAt+1+i]->ptr;
+            void *fptr = lpFirst(lp);
+            if (fptr != NULL)
+                fptr = lpFind(lp, fptr, (unsigned char *) field, sdslen(field), 1);
+
+            if (!fptr)
+                addReplyLongLong(c, HFE_GET_NO_FIELD);
+            else
+                addReplyLongLong(c, HFE_GET_NO_TTL);
+        }
+        return;
+    } else if (hashObj->encoding == OBJ_ENCODING_LISTPACK_EX) {
+        listpackEx *lpt = hashObj->ptr;
+
+        addReplyArrayLen(c, numFields);
+        for (int i = 0 ; i < numFields ; i++) {
+            long long expire;
+            sds field = c->argv[numFieldsAt+1+i]->ptr;
+            void *fptr = lpFirst(lpt->lp);
+            if (fptr != NULL)
+                fptr = lpFind(lpt->lp, fptr, (unsigned char *) field, sdslen(field), 2);
+
+            if (!fptr) {
+                addReplyLongLong(c, HFE_GET_NO_FIELD);
+                continue;
+            }
+
+            fptr = lpNext(lpt->lp, fptr);
+            serverAssert(fptr);
+            fptr = lpNext(lpt->lp, fptr);
+            serverAssert(fptr && lpGetIntegerValue(fptr, &expire));
+
+            if (expire == HASH_LP_NO_TTL) {
+                addReplyLongLong(c, HFE_GET_NO_TTL);
+                continue;
+            }
+
+            if (expire <= commandTimeSnapshot()) {
+                addReplyLongLong(c, HFE_GET_NO_FIELD);
+                continue;
+            }
+
+            if (unit == UNIT_SECONDS)
+                addReplyLongLong(c, (expire + 999 - basetime) / 1000);
+            else
+                addReplyLongLong(c, (expire - basetime));
+        }
+        return;
+    } else if (hashObj->encoding == OBJ_ENCODING_HT) {
+        dict *d = hashObj->ptr;
+
+        addReplyArrayLen(c, numFields);
+        for (int i = 0 ; i < numFields ; i++) {
+            sds field = c->argv[numFieldsAt+1+i]->ptr;
+            dictEntry *de = dictFind(d, field);
+            if (de == NULL) {
+                addReplyLongLong(c, HFE_GET_NO_FIELD);
+                continue;
+            }
+
+            hfield hf = dictGetKey(de);
+            uint64_t expire = hfieldGetExpireTime(hf);
+            if (expire == EB_EXPIRE_TIME_INVALID) {
+                addReplyLongLong(c, HFE_GET_NO_TTL); /* no ttl */
+                continue;
+            }
+
+            if ( (long long) expire < commandTimeSnapshot()) {
+                addReplyLongLong(c, HFE_GET_NO_FIELD);
+                continue;
+            }
+
+            if (unit == UNIT_SECONDS)
+                addReplyLongLong(c, (expire + 999 - basetime) / 1000);
+            else
+                addReplyLongLong(c, (expire - basetime));
+        }
+        return;
+    } else {
+        serverPanic("Unknown encoding: %d", hashObj->encoding);
+    }
+}
+
+/* This is the generic command implementation for HEXPIRE, HPEXPIRE, HEXPIREAT
+ * and HPEXPIREAT. Because the command second argument may be relative or absolute
+ * the "basetime" argument is used to signal what the base time is (either 0
+ * for *AT variants of the command, or the current time for relative expires).
+ *
+ * unit is either UNIT_SECONDS or UNIT_MILLISECONDS, and is only used for
+ * the argv[2] parameter. The basetime is always specified in milliseconds.
+ *
+ * PROPAGATE TO REPLICA:
+ *   The command will be translated into HPEXPIREAT and the expiration time will be
+ *   converted to absolute time in milliseconds.
+ *
+ *   As we need to propagate H(P)EXPIRE(AT) command to the replica, each field that
+ *   is mentioned in the command should be categorized into one of the four options:
+ *   1. Field’s expiration time updated successfully - Propagate it to replica as
+ *      part of the HPEXPIREAT command.
+ *   2. The field got deleted since the time is in the past - propagate also HDEL
+ *      command to delete the field. Also remove the field from the propagated
+ *      HPEXPIREAT command.
+ *   3. Condition not met for the field - Remove the field from the propagated
+ *      HPEXPIREAT command.
+ *   4. Field doesn't exists - Remove the field from propagated HPEXPIREAT command.
+ *
+ *   If none of the provided fields match option #1, that is provided time of the
+ *   command is in the past, then avoid propagating the HPEXPIREAT command to the
+ *   replica.
+ *
+ *   This approach is aligned with existing EXPIRE command. If a given key has already
+ *   expired, then DEL will be propagated instead of EXPIRE command. If condition
+ *   not met, then command will be rejected. Otherwise, EXPIRE command will be
+ *   propagated for given key.
+ */
+static void hexpireGenericCommand(client *c, const char *cmd, long long basetime, int unit) {
+    long numFields = 0, numFieldsAt = 4;
+    long long expire; /* unix time in msec */
+    int fieldAt, fieldsNotSet = 0, expireSetCond = 0;
+    robj *hashObj, *keyArg = c->argv[1], *expireArg = c->argv[2];
+
+    /* Read the hash object */
+    hashObj = lookupKeyWrite(c->db, keyArg);
+    if (checkType(c, hashObj, OBJ_HASH))
+        return;
+
+    /* Read the expiry time from command */
+    if (getLongLongFromObjectOrReply(c, expireArg, &expire, NULL) != C_OK)
+        return;
+
+    if (expire < 0) {
+        addReplyError(c,"invalid expire time, must be >= 0");
+        return;
+    }
+
+    if (unit == UNIT_SECONDS) {
+        if (expire > (long long) HFE_MAX_ABS_TIME_MSEC / 1000) {
+            addReplyErrorExpireTime(c);
+            return;
+        }
+        expire *= 1000;
+    }
+
+    /* Ensure that the final absolute Unix timestamp does not exceed EB_EXPIRE_TIME_MAX. */
+    if (expire > (long long) HFE_MAX_ABS_TIME_MSEC - basetime) {
+        addReplyErrorExpireTime(c);
+        return;
+    }
+    expire += basetime;
+
+    /* Read optional expireSetCond [NX|XX|GT|LT] */
+    char *optArg = c->argv[3]->ptr;
+    if (!strcasecmp(optArg, "nx")) {
+        expireSetCond = HFE_NX; ++numFieldsAt;
+    } else if (!strcasecmp(optArg, "xx")) {
+        expireSetCond = HFE_XX; ++numFieldsAt;
+    } else if (!strcasecmp(optArg, "gt")) {
+        expireSetCond = HFE_GT; ++numFieldsAt;
+    } else if (!strcasecmp(optArg, "lt")) {
+        expireSetCond = HFE_LT; ++numFieldsAt;
+    }
+
+    if (strcasecmp(c->argv[numFieldsAt-1]->ptr, "FIELDS")) {
+        addReplyError(c, "Mandatory argument FIELDS is missing or not at the right position");
+        return;
+    }
+
+    /* Read number of fields */
+    if (getRangeLongFromObjectOrReply(c, c->argv[numFieldsAt], 1, LONG_MAX,
+                                      &numFields, "Parameter `numFields` should be greater than 0") != C_OK)
+        return;
+
+    /* Verify `numFields` is consistent with number of arguments */
+    if (numFields != (c->argc - numFieldsAt - 1)) {
+        addReplyError(c, "The `numfields` parameter must match the number of arguments");
+        return;
+    }
+
+    /* Non-existing keys and empty hashes are the same thing. It also means
+     * fields in the command don't exist in the hash key. */
+    if (!hashObj) {
+        addReplyArrayLen(c, numFields);
+        for (int i = 0; i < numFields; i++) {
+            addReplyLongLong(c, HSETEX_NO_FIELD);
+        }
+        return;
+    }
+
+    HashTypeSetEx exCtx;
+    hashTypeSetExInit(keyArg, hashObj, c, c->db, cmd, expireSetCond, &exCtx);
+    addReplyArrayLen(c, numFields);
+
+    fieldAt = numFieldsAt + 1;
+    while (fieldAt < c->argc) {
+        sds field = c->argv[fieldAt]->ptr;
+        SetExRes res = hashTypeSetEx(hashObj, field, expire, &exCtx);
+
+        if (unlikely(res != HSETEX_OK)) {
+            /* If the field was not set, prevent field propagation */
+            rewriteClientCommandArgument(c, fieldAt, NULL);
+            fieldsNotSet = 1;
+        } else {
+            ++fieldAt;
+        }
+
+        addReplyLongLong(c,res);
+    }
+
+    hashTypeSetExDone(&exCtx);
+
+    /* Avoid propagating command if not even one field was updated (Either because
+     * the time is in the past, and corresponding HDELs were sent, or conditions
+     * not met) then it is useless and invalid to propagate command with no fields */
+    if (exCtx.fieldUpdated == 0) {
+        preventCommandPropagation(c);
+        return;
+    }
+
+    /* If some fields were dropped, rewrite the number of fields */
+    if (fieldsNotSet) {
+        robj *numFieldsObj = createStringObjectFromLongLong(exCtx.fieldUpdated);
+        rewriteClientCommandArgument(c, numFieldsAt, numFieldsObj);
+        decrRefCount(numFieldsObj);
+    }
+
+    /* Propagate as HPEXPIREAT millisecond-timestamp. Rewrite only if not already */
+    if (c->cmd->proc != hpexpireatCommand) {
+        rewriteClientCommandArgument(c,0,shared.hpexpireat);
+    }
+
+    /* rewrite expiration time to unix time in msec  */
+    if (basetime != 0 || unit == UNIT_SECONDS) {
+        robj *expireObj = createStringObjectFromLongLong(expire);
+        rewriteClientCommandArgument(c, 2, expireObj);
+        decrRefCount(expireObj);
+    }
+}
+
+/* HPEXPIRE key milliseconds [ NX | XX | GT | LT] numfields <field [field ...]> */
+void hpexpireCommand(client *c) {
+    hexpireGenericCommand(c,"hpexpire", commandTimeSnapshot(),UNIT_MILLISECONDS);
+}
+
+/* HEXPIRE key seconds [NX | XX | GT | LT] numfields <field [field ...]> */
+void hexpireCommand(client *c) {
+    hexpireGenericCommand(c,"hexpire", commandTimeSnapshot(),UNIT_SECONDS);
+}
+
+/* HEXPIREAT key unix-time-seconds [NX | XX | GT | LT] numfields <field [field ...]> */
+void hexpireatCommand(client *c) {
+    hexpireGenericCommand(c,"hexpireat", 0,UNIT_SECONDS);
+}
+
+/* HPEXPIREAT key unix-time-milliseconds [NX | XX | GT | LT] numfields <field [field ...]> */
+void hpexpireatCommand(client *c) {
+    hexpireGenericCommand(c,"hpexpireat", 0,UNIT_MILLISECONDS);
+}
+
+/* for each specified field: get the remaining time to live in seconds*/
+/* HTTL key numfields <field [field ...]> */
+void httlCommand(client *c) {
+    httlGenericCommand(c, "httl", commandTimeSnapshot(), UNIT_SECONDS);
+}
+
+/* HPTTL key numfields <field [field ...]> */
+void hpttlCommand(client *c) {
+    httlGenericCommand(c, "hpttl", commandTimeSnapshot(), UNIT_MILLISECONDS);
+}
+
+/* HEXPIRETIME key numFields <field [field ...]> */
+void hexpiretimeCommand(client *c) {
+    httlGenericCommand(c, "hexpiretime", 0, UNIT_SECONDS);
+}
+
+/* HPEXPIRETIME key numFields <field [field ...]> */
+void hpexpiretimeCommand(client *c) {
+    httlGenericCommand(c, "hexpiretime", 0, UNIT_MILLISECONDS);
+}
+
+/* HPERSIST key <FIELDS count field [field ...]> */
+void hpersistCommand(client *c) {
+    robj *hashObj;
+    long numFields = 0, numFieldsAt = 3;
+    int changed = 0; /* Used to determine whether to send a notification. */
+
+    /* Read the hash object */
+    hashObj = lookupKeyWrite(c->db, c->argv[1]);
+    if (checkType(c, hashObj, OBJ_HASH))
+        return;
+
+    if (strcasecmp(c->argv[numFieldsAt-1]->ptr, "FIELDS")) {
+        addReplyError(c, "Mandatory argument FIELDS is missing or not at the right position");
+        return;
+    }
+
+    /* Read number of fields */
+    if (getRangeLongFromObjectOrReply(c, c->argv[numFieldsAt], 1, LONG_MAX,
+                                      &numFields, "Number of fields must be a positive integer") != C_OK)
+        return;
+
+    /* Verify `numFields` is consistent with number of arguments */
+    if (numFields != (c->argc - numFieldsAt - 1)) {
+        addReplyError(c, "The `numfields` parameter must match the number of arguments");
+        return;
+    }
+
+    /* Non-existing keys and empty hashes are the same thing. It also means
+     * fields in the command don't exist in the hash key. */
+    if (!hashObj) {
+        addReplyArrayLen(c, numFields);
+        for (int i = 0; i < numFields; i++) {
+            addReplyLongLong(c, HFE_PERSIST_NO_FIELD);
+        }
+        return;
+    }
+
+    if (hashObj->encoding == OBJ_ENCODING_LISTPACK) {
+        addReplyArrayLen(c, numFields);
+        for (int i = 0 ; i < numFields ; i++) {
+            sds field = c->argv[numFieldsAt + 1 + i]->ptr;
+            unsigned char *fptr, *zl = hashObj->ptr;
+
+            fptr = lpFirst(zl);
+            if (fptr != NULL)
+                fptr = lpFind(zl, fptr, (unsigned char *) field, sdslen(field), 1);
+
+            if (!fptr)
+                addReplyLongLong(c, HFE_PERSIST_NO_FIELD);
+            else
+                addReplyLongLong(c, HFE_PERSIST_NO_TTL);
+        }
+        return;
+    } else if (hashObj->encoding == OBJ_ENCODING_LISTPACK_EX) {
+        long long prevExpire;
+        unsigned char *fptr, *vptr, *tptr;
+        listpackEx *lpt = hashObj->ptr;
+
+        addReplyArrayLen(c, numFields);
+        for (int i = 0 ; i < numFields ; i++) {
+            sds field = c->argv[numFieldsAt + 1 + i]->ptr;
+
+            fptr = lpFirst(lpt->lp);
+            if (fptr != NULL)
+                fptr = lpFind(lpt->lp, fptr, (unsigned char*)field, sdslen(field), 2);
+
+            if (!fptr) {
+                addReplyLongLong(c, HFE_PERSIST_NO_FIELD);
+                continue;
+            }
+
+            vptr = lpNext(lpt->lp, fptr);
+            serverAssert(vptr);
+            tptr = lpNext(lpt->lp, vptr);
+            serverAssert(tptr && lpGetIntegerValue(tptr, &prevExpire));
+
+            if (prevExpire == HASH_LP_NO_TTL) {
+                addReplyLongLong(c, HFE_PERSIST_NO_TTL);
+                continue;
+            }
+
+            if (prevExpire < commandTimeSnapshot()) {
+                addReplyLongLong(c, HFE_PERSIST_NO_FIELD);
+                continue;
+            }
+
+            listpackExUpdateExpiry(hashObj, field, fptr, vptr, HASH_LP_NO_TTL);
+            addReplyLongLong(c, HFE_PERSIST_OK);
+            changed = 1;
+        }
+    } else if (hashObj->encoding == OBJ_ENCODING_HT) {
+        dict *d = hashObj->ptr;
+
+        addReplyArrayLen(c, numFields);
+        for (int i = 0 ; i < numFields ; i++) {
+            sds field = c->argv[numFieldsAt + 1 + i]->ptr;
+            dictEntry *de = dictFind(d, field);
+            if (de == NULL) {
+                addReplyLongLong(c, HFE_PERSIST_NO_FIELD);
+                continue;
+            }
+
+            hfield hf = dictGetKey(de);
+            uint64_t expire = hfieldGetExpireTime(hf);
+            if (expire == EB_EXPIRE_TIME_INVALID) {
+                addReplyLongLong(c, HFE_PERSIST_NO_TTL);
+                continue;
+            }
+
+            /* Already expired. Pretend there is no such field */
+            if ( (long long) expire < commandTimeSnapshot()) {
+                addReplyLongLong(c, HFE_PERSIST_NO_FIELD);
+                continue;
+            }
+
+            hfieldPersist(hashObj, hf);
+            addReplyLongLong(c, HFE_PERSIST_OK);
+            changed = 1;
+        }
+    } else {
+        serverPanic("Unknown encoding: %d", hashObj->encoding);
+    }
+
+    /* Generates a hpersist event if the expiry time associated with any field
+     * has been successfully deleted. */
+    if (changed) {
+        notifyKeyspaceEvent(NOTIFY_HASH, "hpersist", c->argv[1], c->db->id);
+        signalModifiedKey(c, c->db, c->argv[1]);
+        server.dirty++;
+    }
 }
diff --git a/src/t_list.c b/src/t_list.c
index dc16606c900..ba8ece9f57d 100644
--- a/src/t_list.c
+++ b/src/t_list.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -62,8 +41,7 @@ static void listTypeTryConvertListpack(robj *o, robj **argv, int start, int end,
         /* Invoke callback before conversion. */
         if (fn) fn(data);
 
-        quicklist *ql = quicklistCreate();
-        quicklistSetOptions(ql, server.list_max_listpack_size, server.list_compress_depth);
+        quicklist *ql = quicklistNew(server.list_max_listpack_size, server.list_compress_depth);
 
         /* Append listpack to quicklist if it's not empty, otherwise release it. */
         if (lpLength(o->ptr))
@@ -631,15 +609,14 @@ void lsetCommand(client *c) {
 
     listTypeTryConversionAppend(o,c->argv,3,3,NULL,NULL);
     if (listTypeReplaceAtIndex(o,index,value)) {
-        addReply(c,shared.ok);
-        signalModifiedKey(c,c->db,c->argv[1]);
-        notifyKeyspaceEvent(NOTIFY_LIST,"lset",c->argv[1],c->db->id);
-        server.dirty++;
-
         /* We might replace a big item with a small one or vice versa, but we've
          * already handled the growing case in listTypeTryConversionAppend()
          * above, so here we just need to try the conversion for shrinking. */
         listTypeTryConversion(o,LIST_CONV_SHRINKING,NULL,NULL);
+        addReply(c,shared.ok);
+        signalModifiedKey(c,c->db,c->argv[1]);
+        notifyKeyspaceEvent(NOTIFY_LIST,"lset",c->argv[1],c->db->id);
+        server.dirty++;
     } else {
         addReplyErrorObject(c,shared.outofrangeerr);
     }
@@ -1060,7 +1037,7 @@ void lremCommand(client *c) {
     long toremove;
     long removed = 0;
 
-    if ((getLongFromObjectOrReply(c, c->argv[2], &toremove, NULL) != C_OK))
+    if (getRangeLongFromObjectOrReply(c, c->argv[2], -LONG_MAX, LONG_MAX, &toremove, NULL) != C_OK)
         return;
 
     subject = lookupKeyWriteOrReply(c,c->argv[1],shared.czero);
@@ -1086,15 +1063,14 @@ void lremCommand(client *c) {
     listTypeReleaseIterator(li);
 
     if (removed) {
-        signalModifiedKey(c,c->db,c->argv[1]);
         notifyKeyspaceEvent(NOTIFY_LIST,"lrem",c->argv[1],c->db->id);
-    }
-
-    if (listTypeLength(subject) == 0) {
-        dbDelete(c->db,c->argv[1]);
-        notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
-    } else if (removed) {
-        listTypeTryConversion(subject,LIST_CONV_SHRINKING,NULL,NULL);
+        if (listTypeLength(subject) == 0) {
+            dbDelete(c->db,c->argv[1]);
+            notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
+        } else {
+            listTypeTryConversion(subject,LIST_CONV_SHRINKING,NULL,NULL);
+        }
+        signalModifiedKey(c,c->db,c->argv[1]);
     }
 
     addReplyLongLong(c,removed);
@@ -1107,9 +1083,9 @@ void lmoveHandlePush(client *c, robj *dstkey, robj *dstobj, robj *value,
         dstobj = createListListpackObject();
         dbAdd(c->db,dstkey,dstobj);
     }
-    signalModifiedKey(c,c->db,dstkey);
     listTypeTryConversionAppend(dstobj,&value,0,0,NULL,NULL);
     listTypePush(dstobj,value,where);
+    signalModifiedKey(c,c->db,dstkey);
     notifyKeyspaceEvent(NOTIFY_LIST,
                         where == LIST_HEAD ? "lpush" : "rpush",
                         dstkey,
diff --git a/src/t_set.c b/src/t_set.c
index ff7dc8ffcf1..f16cde81830 100644
--- a/src/t_set.c
+++ b/src/t_set.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -256,7 +235,6 @@ int setTypeRemoveAux(robj *setobj, char *str, size_t len, int64_t llval, int str
     if (setobj->encoding == OBJ_ENCODING_HT) {
         sds sdsval = str_is_sds ? (sds)str : sdsnewlen(str, len);
         int deleted = (dictDelete(setobj->ptr, sdsval) == DICT_OK);
-        if (deleted && htNeedsResize(setobj->ptr)) dictResize(setobj->ptr);
         if (sdsval != str) sdsfree(sdsval); /* free temp copy */
         return deleted;
     } else if (setobj->encoding == OBJ_ENCODING_LISTPACK) {
@@ -454,7 +432,7 @@ robj *setTypePopRandom(robj *set) {
     if (set->encoding == OBJ_ENCODING_LISTPACK) {
         /* Find random and delete it without re-seeking the listpack. */
         unsigned int i = 0;
-        unsigned char *p = lpNextRandom(set->ptr, lpFirst(set->ptr), &i, 1, 0);
+        unsigned char *p = lpNextRandom(set->ptr, lpFirst(set->ptr), &i, 1, 1);
         unsigned int len = 0; /* initialize to silence warning */
         long long llele = 0; /* initialize to silence warning */
         char *str = (char *)lpGetValue(p, &len, &llele);
@@ -837,7 +815,7 @@ void spopWithCountCommand(client *c) {
         unsigned int index = 0;
         unsigned char **ps = zmalloc(sizeof(char *) * count);
         for (unsigned long i = 0; i < count; i++) {
-            p = lpNextRandom(lp, p, &index, count - i, 0);
+            p = lpNextRandom(lp, p, &index, count - i, 1);
             unsigned int len;
             str = (char *)lpGetValue(p, &len, (long long *)&llele);
 
@@ -899,7 +877,7 @@ void spopWithCountCommand(client *c) {
             unsigned int index = 0;
             unsigned char **ps = zmalloc(sizeof(char *) * remaining);
             for (unsigned long i = 0; i < remaining; i++) {
-                p = lpNextRandom(lp, p, &index, remaining - i, 0);
+                p = lpNextRandom(lp, p, &index, remaining - i, 1);
                 unsigned int len;
                 str = (char *)lpGetValue(p, &len, (long long *)&llele);
                 setTypeAddAux(newset, str, len, llele, 0);
@@ -1125,7 +1103,7 @@ void srandmemberWithCountCommand(client *c) {
         unsigned int i = 0;
         addReplyArrayLen(c, count);
         while (count) {
-            p = lpNextRandom(lp, p, &i, count--, 0);
+            p = lpNextRandom(lp, p, &i, count--, 1);
             unsigned int len;
             str = (char *)lpGetValue(p, &len, (long long *)&llele);
             if (str == NULL) {
@@ -1671,7 +1649,7 @@ void sdiffstoreCommand(client *c) {
 
 void sscanCommand(client *c) {
     robj *set;
-    unsigned long cursor;
+    unsigned long long cursor;
 
     if (parseScanCursorOrReply(c,c->argv[2],&cursor) == C_ERR) return;
     if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.emptyscan)) == NULL ||
diff --git a/src/t_stream.c b/src/t_stream.c
index 5fcb631abf2..478d75c5c7c 100644
--- a/src/t_stream.c
+++ b/src/t_stream.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2017-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -242,10 +221,12 @@ robj *streamDup(robj *o) {
             raxStart(&ri_cpel, consumer->pel);
             raxSeek(&ri_cpel, "^", NULL, 0);
             while (raxNext(&ri_cpel)) {
-                streamNACK *new_nack = raxFind(new_cg->pel,ri_cpel.key,sizeof(streamID));
+                void *result;
+                int found = raxFind(new_cg->pel,ri_cpel.key,sizeof(streamID),&result);
 
-                serverAssert(new_nack != raxNotFound);
+                serverAssert(found);
 
+                streamNACK *new_nack = result;
                 new_nack->consumer = new_consumer;
                 raxInsert(new_consumer->pel,ri_cpel.key,sizeof(streamID),new_nack,NULL);
             }
@@ -1676,7 +1657,7 @@ void streamPropagateConsumerCreation(client *c, robj *key, robj *groupname, sds
 #define STREAM_RWR_RAWENTRIES (1<<1)    /* Do not emit protocol for array
                                            boundaries, just the entries. */
 #define STREAM_RWR_HISTORY (1<<2)       /* Only serve consumer local PEL. */
-size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end, size_t count, int rev, streamCG *group, streamConsumer *consumer, int flags, streamPropInfo *spi) {
+size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end, size_t count, int rev, streamCG *group, streamConsumer *consumer, int flags, streamPropInfo *spi, unsigned long *propCount) {
     void *arraylen_ptr = NULL;
     size_t arraylen = 0;
     streamIterator si;
@@ -1685,6 +1666,8 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end
     int propagate_last_id = 0;
     int noack = flags & STREAM_RWR_NOACK;
 
+    if (propCount) *propCount = 0;
+
     /* If the client is asking for some history, we serve it using a
      * different function, so that we return entries *solely* from its
      * own PEL. This ensures each consumer will always and only see
@@ -1711,10 +1694,11 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end
                 group->entries_read = streamEstimateDistanceFromFirstEverEntry(s,&id);
             }
             group->last_id = id;
-            /* Group last ID should be propagated only if NOACK was
-             * specified, otherwise the last id will be included
-             * in the propagation of XCLAIM itself. */
-            if (noack) propagate_last_id = 1;
+            /* In the past, we would only set it when NOACK was specified. And in
+             * #9127, XCLAIM did not propagate entries_read in ACK, which would
+             * cause entries_read to be inconsistent between master and replicas,
+             * so here we call streamPropagateGroupID unconditionally. */
+            propagate_last_id = 1;
         }
 
         /* Emit a two elements array for each item. The first is
@@ -1760,8 +1744,10 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end
              * or update it if the consumer is the same as before. */
             if (group_inserted == 0) {
                 streamFreeNACK(nack);
-                nack = raxFind(group->pel,buf,sizeof(buf));
-                serverAssert(nack != raxNotFound);
+                void *result;
+                int found = raxFind(group->pel,buf,sizeof(buf),&result);
+                serverAssert(found);
+                nack = result;
                 raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL);
                 /* Update the consumer and NACK metadata. */
                 nack->consumer = consumer;
@@ -1780,6 +1766,7 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end
                 robj *idarg = createObjectFromStreamID(&id);
                 streamPropagateXCLAIM(c,spi->keyname,group,spi->groupname,idarg,nack);
                 decrRefCount(idarg);
+                if (propCount) (*propCount)++;
             }
         }
 
@@ -1787,8 +1774,10 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end
         if (count && count == arraylen) break;
     }
 
-    if (spi && propagate_last_id)
+    if (spi && propagate_last_id) {
         streamPropagateGroupID(c,spi->keyname,group,spi->groupname);
+        if (propCount) (*propCount)++;
+    }
 
     streamIteratorStop(&si);
     if (arraylen_ptr) setDeferredArrayLen(c,arraylen_ptr,arraylen);
@@ -1824,7 +1813,7 @@ size_t streamReplyWithRangeFromConsumerPEL(client *c, stream *s, streamID *start
         streamID thisid;
         streamDecodeID(ri.key,&thisid);
         if (streamReplyWithRange(c,s,&thisid,&thisid,1,0,NULL,NULL,
-                                 STREAM_RWR_RAWENTRIES,NULL) == 0)
+                                 STREAM_RWR_RAWENTRIES,NULL,NULL) == 0)
         {
             /* Note that we may have a not acknowledged entry in the PEL
              * about a message that's no longer here because was removed
@@ -2048,7 +2037,6 @@ void xaddCommand(client *c) {
     sds replyid = createStreamIDString(&id);
     addReplyBulkCBuffer(c, replyid, sdslen(replyid));
 
-    signalModifiedKey(c,c->db,c->argv[1]);
     notifyKeyspaceEvent(NOTIFY_STREAM,"xadd",c->argv[1],c->db->id);
     server.dirty++;
 
@@ -2068,6 +2056,8 @@ void xaddCommand(client *c) {
         }
     }
 
+    signalModifiedKey(c,c->db,c->argv[1]);
+
     /* Let's rewrite the ID argument with the one actually generated for
      * AOF/replication propagation. */
     if (!parsed_args.id_given || !parsed_args.seq_given) {
@@ -2139,7 +2129,7 @@ void xrangeGenericCommand(client *c, int rev) {
         addReplyNullArray(c);
     } else {
         if (count == -1) count = 0;
-        streamReplyWithRange(c,s,&startid,&endid,count,rev,NULL,NULL,0,NULL);
+        streamReplyWithRange(c,s,&startid,&endid,count,rev,NULL,NULL,0,NULL,NULL);
     }
 }
 
@@ -2188,14 +2178,6 @@ void xreadCommand(client *c) {
         int moreargs = c->argc-i-1;
         char *o = c->argv[i]->ptr;
         if (!strcasecmp(o,"BLOCK") && moreargs) {
-            if (c->flags & CLIENT_SCRIPT) {
-                /*
-                 * Although the CLIENT_DENY_BLOCKING flag should protect from blocking the client
-                 * on Lua/MULTI/RM_Call we want special treatment for Lua to keep backward compatibility.
-                 * There is no sense to use BLOCK option within Lua. */
-                addReplyErrorFormat(c, "%s command is not allowed with BLOCK option from scripts", (char *)c->argv[0]->ptr);
-                return;
-            }
             i++;
             if (getTimeoutFromObjectOrReply(c,c->argv[i],&timeout,
                 UNIT_MILLISECONDS) != C_OK) return;
@@ -2298,6 +2280,28 @@ void xreadCommand(client *c) {
                 ids[id_idx].seq = 0;
             }
             continue;
+        } else if (strcmp(c->argv[i]->ptr,"+") == 0) {
+            if (xreadgroup) {
+                addReplyError(c,"The + ID is meaningless in the context of "
+                                "XREADGROUP: you want to read the history of "
+                                "this consumer by specifying a proper ID, or "
+                                "use the > ID to get new messages. The + ID would "
+                                "just return an empty result set.");
+                goto cleanup;
+            }
+            if (o) {
+                stream *s = o->ptr;
+                ids[id_idx] = s->last_id;
+                if (streamDecrID(&ids[id_idx]) != C_OK) {
+                    /* shouldn't happen */
+                    addReplyError(c,"the stream last element ID is 0-0");
+                    goto cleanup;
+                }
+            } else {
+                ids[id_idx].ms = 0;
+                ids[id_idx].seq = 0;
+            }
+            continue;
         } else if (strcmp(c->argv[i]->ptr,">") == 0) {
             if (!xreadgroup) {
                 addReplyError(c,"The > ID can be specified only when calling "
@@ -2387,12 +2391,13 @@ void xreadCommand(client *c) {
             addReplyBulk(c,c->argv[streams_arg+i]);
             
             int flags = 0;
+            unsigned long propCount = 0;
             if (noack) flags |= STREAM_RWR_NOACK;
             if (serve_history) flags |= STREAM_RWR_HISTORY;
             streamReplyWithRange(c,s,&start,NULL,count,0,
                                  groups ? groups[i] : NULL,
-                                 consumer, flags, &spi);
-            if (groups) server.dirty++;
+                                 consumer, flags, &spi, &propCount);
+            if (propCount) server.dirty++;
         }
     }
 
@@ -2481,7 +2486,7 @@ void streamFreeConsumer(streamConsumer *sc) {
  * consumer group is returned. */
 streamCG *streamCreateCG(stream *s, char *name, size_t namelen, streamID *id, long long entries_read) {
     if (s->cgroups == NULL) s->cgroups = raxNew();
-    if (raxFind(s->cgroups,(unsigned char*)name,namelen) != raxNotFound)
+    if (raxFind(s->cgroups,(unsigned char*)name,namelen,NULL))
         return NULL;
 
     streamCG *cg = zmalloc(sizeof(*cg));
@@ -2504,9 +2509,9 @@ void streamFreeCG(streamCG *cg) {
  * pointer, otherwise if there is no such group, NULL is returned. */
 streamCG *streamLookupCG(stream *s, sds groupname) {
     if (s->cgroups == NULL) return NULL;
-    streamCG *cg = raxFind(s->cgroups,(unsigned char*)groupname,
-                           sdslen(groupname));
-    return (cg == raxNotFound) ? NULL : cg;
+    void *cg = NULL;
+    raxFind(s->cgroups,(unsigned char*)groupname,sdslen(groupname),&cg);
+    return cg;
 }
 
 /* Create a consumer with the specified name in the group 'cg' and return.
@@ -2536,9 +2541,8 @@ streamConsumer *streamCreateConsumer(streamCG *cg, sds name, robj *key, int dbid
 /* Lookup the consumer with the specified name in the group 'cg'. */
 streamConsumer *streamLookupConsumer(streamCG *cg, sds name) {
     if (cg == NULL) return NULL;
-    streamConsumer *consumer = raxFind(cg->consumers,(unsigned char*)name,
-                                       sdslen(name));
-    if (consumer == raxNotFound) return NULL;
+    void *consumer = NULL;
+    raxFind(cg->consumers,(unsigned char*)name,sdslen(name),&consumer);
     return consumer;
 }
 
@@ -2852,8 +2856,9 @@ void xackCommand(client *c) {
         /* Lookup the ID in the group PEL: it will have a reference to the
          * NACK structure that will have a reference to the consumer, so that
          * we are able to remove the entry from both PELs. */
-        streamNACK *nack = raxFind(group->pel,buf,sizeof(buf));
-        if (nack != raxNotFound) {
+        void *result;
+        if (raxFind(group->pel,buf,sizeof(buf),&result)) {
+            streamNACK *nack = result;
             raxRemove(group->pel,buf,sizeof(buf),NULL);
             raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL);
             streamFreeNACK(nack);
@@ -3232,12 +3237,14 @@ void xclaimCommand(client *c) {
         streamEncodeID(buf,&id);
 
         /* Lookup the ID in the group PEL. */
-        streamNACK *nack = raxFind(group->pel,buf,sizeof(buf));
+        void *result = NULL;
+        raxFind(group->pel,buf,sizeof(buf),&result);
+        streamNACK *nack = result;
 
         /* Item must exist for us to transfer it to another consumer. */
         if (!streamEntryExists(o->ptr,&id)) {
             /* Clear this entry from the PEL, it no longer exists */
-            if (nack != raxNotFound) {
+            if (nack != NULL) {
                 /* Propagate this change (we are going to delete the NACK). */
                 streamPropagateXCLAIM(c,c->argv[1],group,c->argv[2],c->argv[j],nack);
                 propagate_last_id = 0; /* Will be propagated by XCLAIM itself. */
@@ -3255,13 +3262,13 @@ void xclaimCommand(client *c) {
          * entry in the PEL from scratch, so that XCLAIM can also
          * be used to create entries in the PEL. Useful for AOF
          * and replication of consumer groups. */
-        if (force && nack == raxNotFound) {
+        if (force && nack == NULL) {
             /* Create the NACK. */
             nack = streamCreateNACK(NULL);
             raxInsert(group->pel,buf,sizeof(buf),nack,NULL);
         }
 
-        if (nack != raxNotFound) {
+        if (nack != NULL) {
             /* We need to check if the minimum idle time requested
              * by the caller is satisfied by this entry.
              *
@@ -3297,7 +3304,7 @@ void xclaimCommand(client *c) {
             if (justid) {
                 addReplyStreamID(c,&id);
             } else {
-                serverAssert(streamReplyWithRange(c,o->ptr,&id,&id,1,0,NULL,NULL,STREAM_RWR_RAWENTRIES,NULL) == 1);
+                serverAssert(streamReplyWithRange(c,o->ptr,&id,&id,1,0,NULL,NULL,STREAM_RWR_RAWENTRIES,NULL,NULL) == 1);
             }
             arraylen++;
 
@@ -3472,7 +3479,7 @@ void xautoclaimCommand(client *c) {
         if (justid) {
             addReplyStreamID(c,&id);
         } else {
-            serverAssert(streamReplyWithRange(c,o->ptr,&id,&id,1,0,NULL,NULL,STREAM_RWR_RAWENTRIES,NULL) == 1);
+            serverAssert(streamReplyWithRange(c,o->ptr,&id,&id,1,0,NULL,NULL,STREAM_RWR_RAWENTRIES,NULL,NULL) == 1);
         }
         arraylen++;
         count--;
@@ -3696,18 +3703,18 @@ void xinfoReplyWithStreamInfo(client *c, stream *s) {
         end.ms = end.seq = UINT64_MAX;
         addReplyBulkCString(c,"first-entry");
         emitted = streamReplyWithRange(c,s,&start,&end,1,0,NULL,NULL,
-                                       STREAM_RWR_RAWENTRIES,NULL);
+                                       STREAM_RWR_RAWENTRIES,NULL,NULL);
         if (!emitted) addReplyNull(c);
         addReplyBulkCString(c,"last-entry");
         emitted = streamReplyWithRange(c,s,&start,&end,1,1,NULL,NULL,
-                                       STREAM_RWR_RAWENTRIES,NULL);
+                                       STREAM_RWR_RAWENTRIES,NULL,NULL);
         if (!emitted) addReplyNull(c);
     } else {
         /* XINFO STREAM <key> FULL [COUNT <count>] */
 
         /* Stream entries */
         addReplyBulkCString(c,"entries");
-        streamReplyWithRange(c,s,NULL,NULL,count,0,NULL,NULL,0,NULL);
+        streamReplyWithRange(c,s,NULL,NULL,count,0,NULL,NULL,0,NULL,NULL);
 
         /* Consumer groups */
         addReplyBulkCString(c,"groups");
diff --git a/src/t_string.c b/src/t_string.c
index 2bce3acc819..ce095ca65b5 100644
--- a/src/t_string.c
+++ b/src/t_string.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
diff --git a/src/t_zset.c b/src/t_zset.c
index 7717a4a14df..8533ff12b12 100644
--- a/src/t_zset.c
+++ b/src/t_zset.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-current, Redis Ltd.
  * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
  * All rights reserved.
  *
@@ -67,6 +67,8 @@
 int zslLexValueGteMin(sds value, zlexrangespec *spec);
 int zslLexValueLteMax(sds value, zlexrangespec *spec);
 void zsetConvertAndExpand(robj *zobj, int encoding, unsigned long cap);
+zskiplistNode *zslGetElementByRankFromNode(zskiplistNode *start_node, int start_level, unsigned long rank);
+zskiplistNode *zslGetElementByRank(zskiplist *zsl, unsigned long rank);
 
 /* Create a skiplist node with the specified number of levels.
  * The SDS string 'ele' is referenced by the node after the call. */
@@ -328,54 +330,82 @@ int zslIsInRange(zskiplist *zsl, zrangespec *range) {
     return 1;
 }
 
-/* Find the first node that is contained in the specified range.
- * Returns NULL when no element is contained in the range. */
-zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec *range) {
+/* Find the Nth node that is contained in the specified range. N should be 0-based.
+ * Negative N works for reversed order (-1 represents the last element). Returns
+ * NULL when no element is contained in the range. */
+zskiplistNode *zslNthInRange(zskiplist *zsl, zrangespec *range, long n) {
     zskiplistNode *x;
     int i;
+    long edge_rank = 0;
+    long last_highest_level_rank = 0;
+    zskiplistNode *last_highest_level_node = NULL;
+    unsigned long rank_diff;
 
     /* If everything is out of range, return early. */
     if (!zslIsInRange(zsl,range)) return NULL;
 
+    /* Go forward while *OUT* of range at level of zsl->level-1. */
     x = zsl->header;
-    for (i = zsl->level-1; i >= 0; i--) {
-        /* Go forward while *OUT* of range. */
-        while (x->level[i].forward &&
-            !zslValueGteMin(x->level[i].forward->score,range))
+    i = zsl->level - 1;
+    while (x->level[i].forward && !zslValueGteMin(x->level[i].forward->score, range)) {
+        edge_rank += x->level[i].span;
+        x = x->level[i].forward;
+    }
+    /* Remember the last node which has zsl->level-1 levels and its rank. */
+    last_highest_level_node = x;
+    last_highest_level_rank = edge_rank;
+
+    if (n >= 0) {
+        for (i = zsl->level - 2; i >= 0; i--) {
+            /* Go forward while *OUT* of range. */
+            while (x->level[i].forward && !zslValueGteMin(x->level[i].forward->score, range)) {
+                /* Count the rank of the last element smaller than the range. */
+                edge_rank += x->level[i].span;
                 x = x->level[i].forward;
-    }
-
-    /* This is an inner range, so the next node cannot be NULL. */
-    x = x->level[0].forward;
-    serverAssert(x != NULL);
-
-    /* Check if score <= max. */
-    if (!zslValueLteMax(x->score,range)) return NULL;
-    return x;
-}
-
-/* Find the last node that is contained in the specified range.
- * Returns NULL when no element is contained in the range. */
-zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec *range) {
-    zskiplistNode *x;
-    int i;
-
-    /* If everything is out of range, return early. */
-    if (!zslIsInRange(zsl,range)) return NULL;
-
-    x = zsl->header;
-    for (i = zsl->level-1; i >= 0; i--) {
-        /* Go forward while *IN* range. */
-        while (x->level[i].forward &&
-            zslValueLteMax(x->level[i].forward->score,range))
+            }
+        }
+        /* Check if zsl is long enough. */
+        if ((unsigned long)(edge_rank + n) >= zsl->length) return NULL;
+        if (n < ZSKIPLIST_MAX_SEARCH) {
+            /* If offset is small, we can just jump node by node */
+            /* rank+1 is the first element in range, so we need n+1 steps to reach target. */
+            for (i = 0; i < n + 1; i++) { 
+                x = x->level[0].forward;
+            }
+        } else {
+            /* If offset is big, we can jump from the last zsl->level-1 node. */
+            rank_diff = edge_rank + 1 + n - last_highest_level_rank;
+            x = zslGetElementByRankFromNode(last_highest_level_node, zsl->level - 1, rank_diff);
+        }
+        /* Check if score <= max. */
+        if (x && !zslValueLteMax(x->score,range)) return NULL;
+    } else  {
+        for (i = zsl->level - 1; i >= 0; i--) {
+            /* Go forward while *IN* range. */
+            while (x->level[i].forward && zslValueLteMax(x->level[i].forward->score, range)) {
+                /* Count the rank of the last element in range. */
+                edge_rank += x->level[i].span;
                 x = x->level[i].forward;
+            }
+        }
+        /* Check if the range is big enough. */
+        if (edge_rank < -n) return NULL;
+        if (n + 1 > -ZSKIPLIST_MAX_SEARCH) {
+            /* If offset is small, we can just jump node by node */
+            /* rank is the -1th element in range, so we need -n-1 steps to reach target. */
+            for (i = 0; i < -n - 1; i++) {
+                x = x->backward;
+            }
+        } else {
+            /* If offset is big, we can jump from the last zsl->level-1 node. */
+            /* rank is the last element in range, n is -1-based, so we need n+1 to count backwards. */
+            rank_diff = edge_rank + 1 + n - last_highest_level_rank;
+            x = zslGetElementByRankFromNode(last_highest_level_node, zsl->level - 1, rank_diff);
+        }
+        /* Check if score >= min. */
+        if (x && !zslValueGteMin(x->score, range)) return NULL;
     }
 
-    /* This is an inner range, so this node cannot be NULL. */
-    serverAssert(x != NULL);
-
-    /* Check if score >= min. */
-    if (!zslValueGteMin(x->score,range)) return NULL;
     return x;
 }
 
@@ -498,14 +528,14 @@ unsigned long zslGetRank(zskiplist *zsl, double score, sds ele) {
     return 0;
 }
 
-/* Finds an element by its rank. The rank argument needs to be 1-based. */
-zskiplistNode* zslGetElementByRank(zskiplist *zsl, unsigned long rank) {
+/* Finds an element by its rank from start node. The rank argument needs to be 1-based. */
+zskiplistNode *zslGetElementByRankFromNode(zskiplistNode *start_node, int start_level, unsigned long rank) {
     zskiplistNode *x;
     unsigned long traversed = 0;
     int i;
 
-    x = zsl->header;
-    for (i = zsl->level-1; i >= 0; i--) {
+    x = start_node;
+    for (i = start_level; i >= 0; i--) {
         while (x->level[i].forward && (traversed + x->level[i].span) <= rank)
         {
             traversed += x->level[i].span;
@@ -518,6 +548,11 @@ zskiplistNode* zslGetElementByRank(zskiplist *zsl, unsigned long rank) {
     return NULL;
 }
 
+/* Finds an element by its rank. The rank argument needs to be 1-based. */
+zskiplistNode *zslGetElementByRank(zskiplist *zsl, unsigned long rank) {
+    return zslGetElementByRankFromNode(zsl->header, zsl->level - 1, rank);
+}
+
 /* Populate the rangespec according to the objects min and max. */
 static int zslParseRange(robj *min, robj *max, zrangespec *spec) {
     char *eptr;
@@ -666,54 +701,81 @@ int zslIsInLexRange(zskiplist *zsl, zlexrangespec *range) {
     return 1;
 }
 
-/* Find the first node that is contained in the specified lex range.
- * Returns NULL when no element is contained in the range. */
-zskiplistNode *zslFirstInLexRange(zskiplist *zsl, zlexrangespec *range) {
+/* Find the Nth node that is contained in the specified range. N should be 0-based.
+ * Negative N works for reversed order (-1 represents the last element). Returns
+ * NULL when no element is contained in the range. */
+zskiplistNode *zslNthInLexRange(zskiplist *zsl, zlexrangespec *range, long n) {
     zskiplistNode *x;
     int i;
+    long edge_rank = 0;
+    long last_highest_level_rank = 0;
+    zskiplistNode *last_highest_level_node = NULL;
+    unsigned long rank_diff;
 
     /* If everything is out of range, return early. */
     if (!zslIsInLexRange(zsl,range)) return NULL;
 
+    /* Go forward while *OUT* of range at level of zsl->level-1. */
     x = zsl->header;
-    for (i = zsl->level-1; i >= 0; i--) {
-        /* Go forward while *OUT* of range. */
-        while (x->level[i].forward &&
-            !zslLexValueGteMin(x->level[i].forward->ele,range))
+    i = zsl->level - 1;
+    while (x->level[i].forward && !zslLexValueGteMin(x->level[i].forward->ele, range)) {
+        edge_rank += x->level[i].span;
+        x = x->level[i].forward;
+    }
+    /* Remember the last node which has zsl->level-1 levels and its rank. */
+    last_highest_level_node = x;
+    last_highest_level_rank = edge_rank;
+
+    if (n >= 0) {
+        for (i = zsl->level - 2; i >= 0; i--) {
+            /* Go forward while *OUT* of range. */
+            while (x->level[i].forward && !zslLexValueGteMin(x->level[i].forward->ele, range)) {
+                /* Count the rank of the last element smaller than the range. */
+                edge_rank += x->level[i].span;
                 x = x->level[i].forward;
-    }
-
-    /* This is an inner range, so the next node cannot be NULL. */
-    x = x->level[0].forward;
-    serverAssert(x != NULL);
-
-    /* Check if score <= max. */
-    if (!zslLexValueLteMax(x->ele,range)) return NULL;
-    return x;
-}
-
-/* Find the last node that is contained in the specified range.
- * Returns NULL when no element is contained in the range. */
-zskiplistNode *zslLastInLexRange(zskiplist *zsl, zlexrangespec *range) {
-    zskiplistNode *x;
-    int i;
-
-    /* If everything is out of range, return early. */
-    if (!zslIsInLexRange(zsl,range)) return NULL;
-
-    x = zsl->header;
-    for (i = zsl->level-1; i >= 0; i--) {
-        /* Go forward while *IN* range. */
-        while (x->level[i].forward &&
-            zslLexValueLteMax(x->level[i].forward->ele,range))
+            }
+        }
+        /* Check if zsl is long enough. */
+        if ((unsigned long)(edge_rank + n) >= zsl->length) return NULL; 
+        if (n < ZSKIPLIST_MAX_SEARCH) {
+            /* If offset is small, we can just jump node by node */
+            /* rank+1 is the first element in range, so we need n+1 steps to reach target. */
+            for (i = 0; i < n + 1; i++) { 
+                x = x->level[0].forward;
+            }
+        } else {
+            /* If offset is big, we caasn jump from the last zsl->level-1 node. */
+            rank_diff = edge_rank + 1 + n - last_highest_level_rank;
+            x = zslGetElementByRankFromNode(last_highest_level_node, zsl->level - 1, rank_diff);
+        }
+        /* Check if score <= max. */
+        if (x && !zslLexValueLteMax(x->ele,range)) return NULL;
+    } else {
+        for (i = zsl->level - 1; i >= 0; i--) {
+            /* Go forward while *IN* range. */
+            while (x->level[i].forward && zslLexValueLteMax(x->level[i].forward->ele, range)) {
+                /* Count the rank of the last element in range. */
+                edge_rank += x->level[i].span;
                 x = x->level[i].forward;
+            }
+        }
+        /* Check if the range is big enough. */
+        if (edge_rank < -n) return NULL;
+        if (n + 1 > -ZSKIPLIST_MAX_SEARCH) {
+            /* If offset is small, we can just jump node by node */
+            for (i = 0; i < -n - 1; i++) {
+                x = x->backward;
+            }
+        } else {
+            /* If offset is big, we can jump from the last zsl->level-1 node. */
+            /* rank is the last element in range, n is -1-based, so we need n+1 to count backwards. */
+            rank_diff = edge_rank + 1 + n - last_highest_level_rank;
+            x = zslGetElementByRankFromNode(last_highest_level_node, zsl->level - 1, rank_diff);
+        }
+        /* Check if score >= min. */
+        if (x && !zslLexValueGteMin(x->ele, range)) return NULL;
     }
 
-    /* This is an inner range, so this node cannot be NULL. */
-    serverAssert(x != NULL);
-
-    /* Check if score >= min. */
-    if (!zslLexValueGteMin(x->ele,range)) return NULL;
     return x;
 }
 
@@ -1172,7 +1234,8 @@ unsigned long zsetLength(const robj *zobj) {
  * and the value len hint indicates the approximate individual size of the added elements,
  * they are used to determine the initial representation.
  *
- * If the hints are not known, and underestimation or 0 is suitable. */
+ * If the hints are not known, and underestimation or 0 is suitable. 
+ * We should never pass a negative value because it will convert to a very large unsigned number. */
 robj *zsetTypeCreate(size_t size_hint, size_t val_len_hint) {
     if (size_hint <= server.zset_max_listpack_entries &&
         val_len_hint <= server.zset_max_listpack_value)
@@ -1534,7 +1597,6 @@ int zsetDel(robj *zobj, sds ele) {
     } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
         zset *zs = zobj->ptr;
         if (zsetRemoveFromSkiplist(zs, ele)) {
-            if (htNeedsResize(zs->dict)) dictResize(zs->dict);
             return 1;
         }
     } else {
@@ -1692,7 +1754,7 @@ void zsetTypeRandomElement(robj *zsetobj, unsigned long zsetsize, listpackEntry
             *score = *(double*)dictGetVal(de);
     } else if (zsetobj->encoding == OBJ_ENCODING_LISTPACK) {
         listpackEntry val;
-        lpRandomPair(zsetobj->ptr, zsetsize, key, &val);
+        lpRandomPair(zsetobj->ptr, zsetsize, key, &val, 2);
         if (score) {
             if (val.sval) {
                 *score = zzlStrtod(val.sval,val.slen);
@@ -1949,6 +2011,7 @@ void zremrangeGenericCommand(client *c, zrange_type rangetype) {
         }
     } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
         zset *zs = zobj->ptr;
+        dictPauseAutoResize(zs->dict);
         switch(rangetype) {
         case ZRANGE_AUTO:
         case ZRANGE_RANK:
@@ -1961,10 +2024,12 @@ void zremrangeGenericCommand(client *c, zrange_type rangetype) {
             deleted = zslDeleteRangeByLex(zs->zsl,&lexrange,zs->dict);
             break;
         }
-        if (htNeedsResize(zs->dict)) dictResize(zs->dict);
+        dictResumeAutoResize(zs->dict);
         if (dictSize(zs->dict) == 0) {
             dbDelete(c->db,key);
             keyremoved = 1;
+        } else {
+            dictShrinkIfNeeded(zs->dict);
         }
     } else {
         serverPanic("Unknown sorted set encoding");
@@ -2473,10 +2538,12 @@ static void zdiffAlgorithm2(zsetopsrc *src, long setnum, zset *dstzset, size_t *
                 dictAdd(dstzset->dict,tmp,&znode->score);
                 cardinality++;
             } else {
+                dictPauseAutoResize(dstzset->dict);
                 tmp = zuiSdsFromValue(&zval);
                 if (zsetRemoveFromSkiplist(dstzset, tmp)) {
                     cardinality--;
                 }
+                dictResumeAutoResize(dstzset->dict);
             }
 
             /* Exit if result set is empty as any additional removal
@@ -2489,7 +2556,7 @@ static void zdiffAlgorithm2(zsetopsrc *src, long setnum, zset *dstzset, size_t *
     }
 
     /* Resize dict if needed after removing multiple elements */
-    if (htNeedsResize(dstzset->dict)) dictResize(dstzset->dict);
+    dictShrinkIfNeeded(dstzset->dict);
 
     /* Using this algorithm, we can't calculate the max element as we go,
      * we have to iterate through all elements to find the max one after. */
@@ -2598,8 +2665,14 @@ void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, in
         return;
     }
 
+    /* Try to allocate the src table, and abort on insufficient memory. */
+    src = ztrycalloc(sizeof(zsetopsrc) * setnum);
+    if (src == NULL) {
+        addReplyError(c, "Insufficient memory, failed allocating transient memory, too many args.");
+        return;
+    }
+
     /* read keys to be used for input */
-    src = zcalloc(sizeof(zsetopsrc) * setnum);
     for (i = 0, j = numkeysIndex+1; i < setnum; i++, j++) {
         robj *obj = lookupKeyRead(c->db, c->argv[j]);
         if (obj != NULL) {
@@ -3001,7 +3074,7 @@ static void zrangeResultFinalizeClient(zrange_result_handler *handler,
 /* Result handler methods for storing the ZRANGESTORE to a zset. */
 static void zrangeResultBeginStore(zrange_result_handler *handler, long length)
 {
-    handler->dstobj = zsetTypeCreate(length, 0);
+    handler->dstobj = zsetTypeCreate(length >= 0 ? length : 0, 0);
 }
 
 static void zrangeResultEmitCBufferForStore(zrange_result_handler *handler,
@@ -3269,19 +3342,9 @@ void genericZrangebyscoreCommand(zrange_result_handler *handler,
 
         /* If reversed, get the last node in range as starting point. */
         if (reverse) {
-            ln = zslLastInRange(zsl,range);
+            ln = zslNthInRange(zsl,range,-offset-1);
         } else {
-            ln = zslFirstInRange(zsl,range);
-        }
-
-        /* If there is an offset, just traverse the number of elements without
-         * checking the score because that is done in the next loop. */
-        while (ln && offset--) {
-            if (reverse) {
-                ln = ln->backward;
-            } else {
-                ln = ln->level[0].forward;
-            }
+            ln = zslNthInRange(zsl,range,offset);
         }
 
         while (ln && limit--) {
@@ -3377,7 +3440,7 @@ void zcountCommand(client *c) {
         unsigned long rank;
 
         /* Find first element in range */
-        zn = zslFirstInRange(zsl, &range);
+        zn = zslNthInRange(zsl, &range, 0);
 
         /* Use rank of first element, if any, to determine preliminary count */
         if (zn != NULL) {
@@ -3385,7 +3448,7 @@ void zcountCommand(client *c) {
             count = (zsl->length - (rank - 1));
 
             /* Find last element in range */
-            zn = zslLastInRange(zsl, &range);
+            zn = zslNthInRange(zsl, &range, -1);
 
             /* Use rank of last element, if any, to determine the actual count */
             if (zn != NULL) {
@@ -3455,7 +3518,7 @@ void zlexcountCommand(client *c) {
         unsigned long rank;
 
         /* Find first element in range */
-        zn = zslFirstInLexRange(zsl, &range);
+        zn = zslNthInLexRange(zsl, &range, 0);
 
         /* Use rank of first element, if any, to determine preliminary count */
         if (zn != NULL) {
@@ -3463,7 +3526,7 @@ void zlexcountCommand(client *c) {
             count = (zsl->length - (rank - 1));
 
             /* Find last element in range */
-            zn = zslLastInLexRange(zsl, &range);
+            zn = zslNthInLexRange(zsl, &range, -1);
 
             /* Use rank of last element, if any, to determine the actual count */
             if (zn != NULL) {
@@ -3550,19 +3613,9 @@ void genericZrangebylexCommand(zrange_result_handler *handler,
 
         /* If reversed, get the last node in range as starting point. */
         if (reverse) {
-            ln = zslLastInLexRange(zsl,range);
+            ln = zslNthInLexRange(zsl,range,-offset-1);
         } else {
-            ln = zslFirstInLexRange(zsl,range);
-        }
-
-        /* If there is an offset, just traverse the number of elements without
-         * checking the score because that is done in the next loop. */
-        while (ln && offset--) {
-            if (reverse) {
-                ln = ln->backward;
-            } else {
-                ln = ln->level[0].forward;
-            }
+            ln = zslNthInLexRange(zsl,range,offset);
         }
 
         while (ln && limit--) {
@@ -3859,7 +3912,7 @@ void zrevrankCommand(client *c) {
 
 void zscanCommand(client *c) {
     robj *o;
-    unsigned long cursor;
+    unsigned long long cursor;
 
     if (parseScanCursorOrReply(c,c->argv[2],&cursor) == C_ERR) return;
     if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptyscan)) == NULL ||
@@ -3994,7 +4047,6 @@ void genericZpopCommand(client *c, robj **keyv, int keyc, int where, int emitkey
         if (result_count == 0) { /* Do this only for the first iteration. */
             char *events[2] = {"zpopmin","zpopmax"};
             notifyKeyspaceEvent(NOTIFY_ZSET,events[where],key,c->db->id);
-            signalModifiedKey(c,c->db,key);
         }
 
         if (use_nested_array) {
@@ -4013,6 +4065,7 @@ void genericZpopCommand(client *c, robj **keyv, int keyc, int where, int emitkey
         dbDelete(c->db,key);
         notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
     }
+    signalModifiedKey(c,c->db,key);
 
     if (c->cmd->proc == zmpopCommand) {
         /* Always replicate it as ZPOP[MIN|MAX] with COUNT option instead of ZMPOP. */
@@ -4210,7 +4263,7 @@ void zrandmemberWithCountCommand(client *c, long l, int withscores) {
             while (count) {
                 sample_count = count > limit ? limit : count;
                 count -= sample_count;
-                lpRandomPairs(zsetobj->ptr, sample_count, keys, vals);
+                lpRandomPairs(zsetobj->ptr, sample_count, keys, vals, 2);
                 zrandmemberReplyWithListpack(c, sample_count, keys, vals);
                 if (c->flags & CLIENT_CLOSE_ASAP)
                     break;
@@ -4264,7 +4317,7 @@ void zrandmemberWithCountCommand(client *c, long l, int withscores) {
         keys = zmalloc(sizeof(listpackEntry)*count);
         if (withscores)
             vals = zmalloc(sizeof(listpackEntry)*count);
-        serverAssert(lpRandomPairsUnique(zsetobj->ptr, count, keys, vals) == count);
+        serverAssert(lpRandomPairsUnique(zsetobj->ptr, count, keys, vals, 2) == count);
         zrandmemberReplyWithListpack(c, count, keys, vals);
         zfree(keys);
         zfree(vals);
diff --git a/src/testhelp.h b/src/testhelp.h
index d5be80e79bd..3304ccfeeb1 100644
--- a/src/testhelp.h
+++ b/src/testhelp.h
@@ -8,32 +8,11 @@
  *
  * ----------------------------------------------------------------------------
  *
- * Copyright (c) 2010-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __TESTHELP_H
@@ -42,6 +21,8 @@
 #define REDIS_TEST_ACCURATE     (1<<0)
 #define REDIS_TEST_LARGE_MEMORY (1<<1)
 #define REDIS_TEST_VALGRIND     (1<<2)
+#define REDIS_TEST_VERBOSE      (1<<3)
+
 
 extern int __failed_tests;
 extern int __test_num;
diff --git a/src/threads_mngr.c b/src/threads_mngr.c
new file mode 100644
index 00000000000..09e7d64ad5b
--- /dev/null
+++ b/src/threads_mngr.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2021-Present, Redis Ltd.
+ * All rights reserved.
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
+ */
+
+#include "threads_mngr.h"
+/* Anti-warning macro... */
+#define UNUSED(V) ((void) V)
+
+#ifdef __linux__
+#include "atomicvar.h"
+#include "server.h"
+
+#include <signal.h>
+#include <time.h>
+#include <sys/syscall.h>
+
+#define IN_PROGRESS 1
+static const clock_t RUN_ON_THREADS_TIMEOUT = 2;
+
+/*================================= Globals ================================= */
+
+static run_on_thread_cb g_callback = NULL;
+static volatile size_t g_tids_len = 0;
+static redisAtomic size_t g_num_threads_done = 0;
+
+/* This flag is set while ThreadsManager_runOnThreads is running */
+static redisAtomic int g_in_progress = 0;
+
+/*============================ Internal prototypes ========================== */
+
+static void invoke_callback(int sig);
+/* returns 0 if it is safe to start, IN_PROGRESS otherwise. */
+static int test_and_start(void);
+static void wait_threads(void);
+/* Clean up global variable.
+Assuming we are under the g_in_progress protection, this is not a thread-safe function */
+static void ThreadsManager_cleanups(void);
+
+/*============================ API functions implementations ========================== */
+
+void ThreadsManager_init(void) {
+    /* Register signal handler */
+    struct sigaction act;
+    sigemptyset(&act.sa_mask);
+    /* Not setting SA_RESTART flag means that If a signal handler is invoked while a
+    system call or library function call is blocked, use the default behavior
+    i.e., the call fails with the error EINTR */
+    act.sa_flags = 0;
+    act.sa_handler = invoke_callback;
+    sigaction(SIGUSR2, &act, NULL);
+}
+
+__attribute__ ((noinline))
+int ThreadsManager_runOnThreads(pid_t *tids, size_t tids_len, run_on_thread_cb callback) {
+    /* Check if it is safe to start running. If not - return */
+    if(test_and_start() == IN_PROGRESS) {
+        return 0;
+    }
+
+    /* Update g_callback */
+    g_callback = callback;
+
+    /* Set g_tids_len */
+    g_tids_len = tids_len;
+
+    /* set g_num_threads_done to 0 To handler the case where in the previous run we reached the timeout
+    and called ThreadsManager_cleanups before one or more threads were done and increased
+    (the already set to 0) g_num_threads_done */
+    g_num_threads_done = 0;
+
+    /* Send signal to all the threads in tids */
+    pid_t pid = getpid();
+    for (size_t i = 0; i < tids_len ; ++i) {
+        syscall(SYS_tgkill, pid, tids[i], THREADS_SIGNAL);
+    }
+
+    /* Wait for all the threads to write to the output array, or until timeout is reached */
+    wait_threads();
+
+    /* Cleanups to allow next execution */
+    ThreadsManager_cleanups();
+
+    return 1;
+}
+
+/*============================ Internal functions implementations ========================== */
+
+
+static int test_and_start(void) {
+    /* atomicFlagGetSet sets the variable to 1 and returns the previous value */
+    int prev_state;
+    atomicFlagGetSet(g_in_progress, prev_state);
+
+    /* If prev_state is 1, g_in_progress was on. */
+    return prev_state;
+}
+
+__attribute__ ((noinline))
+static void invoke_callback(int sig) {
+    UNUSED(sig);
+    run_on_thread_cb callback = g_callback;
+    if (callback) {
+        callback();
+        atomicIncr(g_num_threads_done, 1);
+    } else {
+        serverLogFromHandler(LL_WARNING, "tid %ld: ThreadsManager g_callback is NULL", syscall(SYS_gettid));
+    }
+}
+
+static void wait_threads(void) {
+    struct timespec timeout_time;
+    clock_gettime(CLOCK_REALTIME, &timeout_time);
+
+    /* calculate relative time until timeout */
+    timeout_time.tv_sec += RUN_ON_THREADS_TIMEOUT;
+
+    /* Wait until all threads are done to invoke the callback or until we reached the timeout */
+    size_t curr_done_count;
+    struct timespec curr_time;
+
+    do {
+        struct timeval tv = {
+            .tv_sec = 0,
+            .tv_usec = 10};
+        /* Sleep a bit to yield to other threads. */
+        /* usleep isn't listed as signal safe, so we use select instead */
+        select(0, NULL, NULL, NULL, &tv);
+        atomicGet(g_num_threads_done, curr_done_count);
+        clock_gettime(CLOCK_REALTIME, &curr_time);
+    } while (curr_done_count < g_tids_len &&
+             curr_time.tv_sec <= timeout_time.tv_sec);
+
+    if (curr_time.tv_sec > timeout_time.tv_sec) {
+        serverLogRawFromHandler(LL_WARNING, "wait_threads(): waiting threads timed out");
+    }
+
+}
+
+static void ThreadsManager_cleanups(void) {
+    g_callback = NULL;
+    g_tids_len = 0;
+    g_num_threads_done = 0;
+
+    /* Lastly, turn off g_in_progress */
+    atomicSet(g_in_progress, 0);
+
+}
+#else
+
+void ThreadsManager_init(void) {
+    /* DO NOTHING */
+}
+
+int ThreadsManager_runOnThreads(pid_t *tids, size_t tids_len, run_on_thread_cb callback) {
+    /* DO NOTHING */
+    UNUSED(tids);
+    UNUSED(tids_len);
+    UNUSED(callback);
+    return 1;
+}
+
+#endif /* __linux__ */
diff --git a/src/threads_mngr.h b/src/threads_mngr.h
new file mode 100644
index 00000000000..009adfb5a1e
--- /dev/null
+++ b/src/threads_mngr.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021-Present, Redis Ltd.
+ * All rights reserved.
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
+ */
+
+#pragma once
+
+#include "fmacros.h"
+
+#include <sys/types.h>
+#include <unistd.h>
+
+/** This is an API to invoke callback on a list of threads using a user defined signal handler.
+ * NOTE: This is API is only supported only in linux systems. 
+ * Calling the functions below on any other system does nothing.
+*/
+
+#define THREADS_SIGNAL SIGUSR2
+
+/* Callback signature */
+typedef void(*run_on_thread_cb)(void);
+
+/* Register the process to THREADS_SIGNAL */
+void ThreadsManager_init(void);
+
+/** @brief Invoke callback by each thread in tids.
+ *
+ * @param tids  An array of threads that need to invoke callback.
+ * @param tids_len The number of threads in @param tids.
+ * @param callback A callback to be invoked by each thread in @param tids.
+ *
+ * NOTES:
+ * It is assumed that all the threads don't block or ignore THREADS_SIGNAL.
+ *
+ * It is safe to include the calling thread in @param tids. However, be aware that subsequent tids will
+ * not be signaled until the calling thread returns from the callback invocation.
+ * Hence, it is recommended to place the calling thread last in @param tids.
+ *
+ * The function returns only when @param tids_len threads have returned from @param callback, or when we reached timeout.
+ *
+ * @return 1 if successful, 0 If ThreadsManager_runOnThreads is already in the middle of execution.
+ *
+**/
+
+int ThreadsManager_runOnThreads(pid_t *tids, size_t tids_len, run_on_thread_cb callback);
diff --git a/src/timeout.c b/src/timeout.c
index eb971dcdc1a..c5ad3403791 100644
--- a/src/timeout.c
+++ b/src/timeout.c
@@ -1,29 +1,9 @@
-/* Copyright (c) 2009-2020, Salvatore Sanfilippo <antirez at gmail dot com>
+/*
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
diff --git a/src/tls.c b/src/tls.c
index e709c99309d..3cc504ad150 100644
--- a/src/tls.c
+++ b/src/tls.c
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2019, Redis Labs
+ * Copyright (c) 2019-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #define REDISMODULE_CORE_MODULE /* A module that's part of the redis core, uses server.h too. */
@@ -211,6 +190,7 @@ static SSL_CTX *createSSLContext(redisTLSContextConfig *ctx_config, int protocol
     SSL_CTX *ctx = NULL;
 
     ctx = SSL_CTX_new(SSLv23_method());
+    if (!ctx) goto error;
 
     SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2|SSL_OP_NO_SSLv3);
 
@@ -765,7 +745,8 @@ static void tlsEventHandler(struct aeEventLoop *el, int fd, void *clientData, in
 }
 
 static void tlsAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
-    int cport, cfd, max = MAX_ACCEPTS_PER_CALL;
+    int cport, cfd;
+    int max = server.max_new_tls_conns_per_cycle;
     char cip[NET_IP_STR_LEN];
     UNUSED(el);
     UNUSED(mask);
diff --git a/src/tracking.c b/src/tracking.c
index 5a9b114aa80..8ff14369dc2 100644
--- a/src/tracking.c
+++ b/src/tracking.c
@@ -1,31 +1,10 @@
 /* tracking.c - Client side caching: keys tracking and invalidation
  *
- * Copyright (c) 2019, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2019-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "server.h"
@@ -72,8 +51,10 @@ void disableTracking(client *c) {
         raxStart(&ri,c->client_tracking_prefixes);
         raxSeek(&ri,"^",NULL,0);
         while(raxNext(&ri)) {
-            bcastState *bs = raxFind(PrefixTable,ri.key,ri.key_len);
-            serverAssert(bs != raxNotFound);
+            void *result;
+            int found = raxFind(PrefixTable,ri.key,ri.key_len,&result);
+            serverAssert(found);
+            bcastState *bs = result;
             raxRemove(bs->clients,(unsigned char*)&c,sizeof(c),NULL);
             /* Was it the last client? Remove the prefix from the
              * table. */
@@ -153,14 +134,17 @@ int checkPrefixCollisionsOrReply(client *c, robj **prefixes, size_t numprefix) {
 /* Set the client 'c' to track the prefix 'prefix'. If the client 'c' is
  * already registered for the specified prefix, no operation is performed. */
 void enableBcastTrackingForPrefix(client *c, char *prefix, size_t plen) {
-    bcastState *bs = raxFind(PrefixTable,(unsigned char*)prefix,plen);
+    void *result;
+    bcastState *bs;
     /* If this is the first client subscribing to such prefix, create
      * the prefix in the table. */
-    if (bs == raxNotFound) {
+    if (!raxFind(PrefixTable,(unsigned char*)prefix,plen,&result)) {
         bs = zmalloc(sizeof(*bs));
         bs->keys = raxNew();
         bs->clients = raxNew();
         raxInsert(PrefixTable,(unsigned char*)prefix,plen,bs,NULL);
+    } else {
+        bs = result;
     }
     if (raxTryInsert(bs->clients,(unsigned char*)&c,sizeof(c),NULL,NULL)) {
         if (c->client_tracking_prefixes == NULL)
@@ -240,12 +224,15 @@ void trackingRememberKeys(client *tracking, client *executing) {
     for(int j = 0; j < numkeys; j++) {
         int idx = keys[j].pos;
         sds sdskey = executing->argv[idx]->ptr;
-        rax *ids = raxFind(TrackingTable,(unsigned char*)sdskey,sdslen(sdskey));
-        if (ids == raxNotFound) {
+        void *result;
+        rax *ids;
+        if (!raxFind(TrackingTable,(unsigned char*)sdskey,sdslen(sdskey),&result)) {
             ids = raxNew();
             int inserted = raxTryInsert(TrackingTable,(unsigned char*)sdskey,
                                         sdslen(sdskey),ids, NULL);
             serverAssert(inserted == 1);
+        } else {
+            ids = result;
         }
         if (raxTryInsert(ids,(unsigned char*)&tracking->id,sizeof(tracking->id),NULL,NULL))
             TrackingTableTotalItems++;
@@ -372,8 +359,9 @@ void trackingInvalidateKey(client *c, robj *keyobj, int bcast) {
     if (bcast && raxSize(PrefixTable) > 0)
         trackingRememberKeyToBroadcast(c,(char *)key,keylen);
 
-    rax *ids = raxFind(TrackingTable,key,keylen);
-    if (ids == raxNotFound) return;
+    void *result;
+    if (!raxFind(TrackingTable,key,keylen,&result)) return;
+    rax *ids = result;
 
     raxIterator ri;
     raxStart(&ri,ids);
diff --git a/src/unix.c b/src/unix.c
index bd146d0245e..eb5850765a8 100644
--- a/src/unix.c
+++ b/src/unix.c
@@ -92,7 +92,8 @@ static connection *connCreateAcceptedUnix(int fd, void *priv) {
 }
 
 static void connUnixAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
-    int cfd, max = MAX_ACCEPTS_PER_CALL;
+    int cfd;
+    int max = server.max_new_conns_per_cycle;
     UNUSED(el);
     UNUSED(mask);
     UNUSED(privdata);
diff --git a/src/util.c b/src/util.c
index 26d92b92290..98cd9ddf640 100644
--- a/src/util.c
+++ b/src/util.c
@@ -1,5 +1,6 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-current, Redis Ltd.
+ * Copyright (c) 2012, Twitter, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -534,6 +535,43 @@ int string2l(const char *s, size_t slen, long *lval) {
     return 1;
 }
 
+/* return 1 if c>= start && c <= end, 0 otherwise*/
+static int safe_is_c_in_range(char c, char start, char end) {
+    if (c >= start && c <= end) return 1;
+    return 0;
+}
+
+static int base_16_char_type(char c) {
+    if (safe_is_c_in_range(c, '0', '9')) return 0;
+    if (safe_is_c_in_range(c, 'a', 'f')) return 1;
+    if (safe_is_c_in_range(c, 'A', 'F')) return 2;
+    return -1;
+}
+
+/** This is an async-signal safe version of string2l to convert unsigned long to string.
+ * The function translates @param src until it reaches a value that is not 0-9, a-f or A-F, or @param we read slen characters.
+ * On successes writes the result to @param result_output and returns 1.
+ * if the string represents an overflow value, return -1. */
+int string2ul_base16_async_signal_safe(const char *src, size_t slen, unsigned long *result_output) {
+    static char ascii_to_dec[] = {'0', 'a' - 10, 'A' - 10};
+
+    int char_type = 0;
+    size_t curr_char_idx = 0;
+    unsigned long result = 0;
+    int base = 16;
+    while ((-1 != (char_type = base_16_char_type(src[curr_char_idx]))) &&
+            curr_char_idx < slen) {
+        unsigned long curr_val = src[curr_char_idx] - ascii_to_dec[char_type];
+        if ((result > ULONG_MAX / base) || (result > (ULONG_MAX - curr_val)/base)) /* Overflow. */
+            return -1;
+        result = result * base + curr_val;
+        ++curr_char_idx;
+    }
+
+    *result_output = result;
+    return 1;
+}
+
 /* Convert a string into a double. Returns 1 if the string could be parsed
  * into a (non-overflowing) double, 0 otherwise. The value will be set to
  * the parsed value when appropriate.
@@ -993,10 +1031,9 @@ long getTimeZone(void) {
 #if defined(__linux__) || defined(__sun)
     return timezone;
 #else
-    struct timeval tv;
     struct timezone tz;
 
-    gettimeofday(&tv, &tz);
+    gettimeofday(NULL, &tz);
 
     return tz.tz_minuteswest * 60L;
 #endif
@@ -1062,6 +1099,7 @@ int dirRemove(char *dname) {
 
         if (S_ISDIR(stat_entry.st_mode) != 0) {
             if (dirRemove(full_path) == -1) {
+                closedir(dir);
                 return -1;
             }
             continue;
@@ -1129,7 +1167,7 @@ int fsyncFileDir(const char *filename) {
         errno = save_errno;
         return -1;
     }
-    
+
     close(dir_fd);
     return 0;
 }
@@ -1138,7 +1176,10 @@ int fsyncFileDir(const char *filename) {
 int reclaimFilePageCache(int fd, size_t offset, size_t length) {
 #ifdef HAVE_FADVISE
     int ret = posix_fadvise(fd, offset, length, POSIX_FADV_DONTNEED);
-    if (ret) return -1;
+    if (ret) {
+        errno = ret;
+        return -1;
+    }
     return 0;
 #else
     UNUSED(fd);
@@ -1148,6 +1189,195 @@ int reclaimFilePageCache(int fd, size_t offset, size_t length) {
 #endif
 }
 
+/** An async signal safe version of fgets().
+ * Has the same behaviour as standard fgets(): reads a line from fd and stores it into the dest buffer.
+ * It stops when either (buff_size-1) characters are read, the newline character is read, or the end-of-file is reached,
+ * whichever comes first.
+ *
+ * On success, the function returns the same dest parameter. If the End-of-File is encountered and no characters have
+ * been read, the contents of dest remain unchanged and a null pointer is returned.
+ * If an error occurs, a null pointer is returned. */
+char *fgets_async_signal_safe(char *dest, int buff_size, int fd) {
+    for (int i = 0; i < buff_size; i++) {
+        /* Read one byte */
+        ssize_t bytes_read_count = read(fd, dest + i, 1);
+        /* On EOF or error return NULL */
+        if (bytes_read_count < 1) {
+            return NULL;
+        }
+        /* we found the end of the line. */
+        if (dest[i] == '\n') {
+            break;
+        }
+    }
+    return dest;
+}
+
+static const char HEX[] = "0123456789abcdef";
+
+static char *u2string_async_signal_safe(int _base, uint64_t val, char *buf) {
+    uint32_t base = (uint32_t) _base;
+    *buf-- = 0;
+    do {
+        *buf-- = HEX[val % base];
+    } while ((val /= base) != 0);
+    return buf + 1;
+}
+
+static char *i2string_async_signal_safe(int base, int64_t val, char *buf) {
+    char *orig_buf = buf;
+    const int32_t is_neg = (val < 0);
+    *buf-- = 0;
+
+    if (is_neg) {
+        val = -val;
+    }
+    if (is_neg && base == 16) {
+        int ix;
+        val -= 1;
+        for (ix = 0; ix < 16; ++ix)
+            buf[-ix] = '0';
+    }
+
+    do {
+        *buf-- = HEX[val % base];
+    } while ((val /= base) != 0);
+
+    if (is_neg && base == 10) {
+        *buf-- = '-';
+    }
+
+    if (is_neg && base == 16) {
+        int ix;
+        buf = orig_buf - 1;
+        for (ix = 0; ix < 16; ++ix, --buf) {
+            /* *INDENT-OFF* */
+            switch (*buf) {
+            case '0': *buf = 'f'; break;
+            case '1': *buf = 'e'; break;
+            case '2': *buf = 'd'; break;
+            case '3': *buf = 'c'; break;
+            case '4': *buf = 'b'; break;
+            case '5': *buf = 'a'; break;
+            case '6': *buf = '9'; break;
+            case '7': *buf = '8'; break;
+            case '8': *buf = '7'; break;
+            case '9': *buf = '6'; break;
+            case 'a': *buf = '5'; break;
+            case 'b': *buf = '4'; break;
+            case 'c': *buf = '3'; break;
+            case 'd': *buf = '2'; break;
+            case 'e': *buf = '1'; break;
+            case 'f': *buf = '0'; break;
+            }
+            /* *INDENT-ON* */
+        }
+    }
+    return buf + 1;
+}
+
+static const char *check_longlong_async_signal_safe(const char *fmt, int32_t *have_longlong) {
+    *have_longlong = 0;
+    if (*fmt == 'l') {
+        fmt++;
+        if (*fmt != 'l') {
+            *have_longlong = (sizeof(long) == sizeof(int64_t));
+        } else {
+            fmt++;
+            *have_longlong = 1;
+        }
+    }
+    return fmt;
+}
+
+int vsnprintf_async_signal_safe(char *to, size_t size, const char *format, va_list ap) {
+    char *start = to;
+    char *end = start + size - 1;
+    for (; *format; ++format) {
+        int32_t have_longlong = 0;
+        if (*format != '%') {
+            if (to == end) { /* end of buffer */
+                break;
+            }
+            *to++ = *format; /* copy ordinary char */
+            continue;
+        }
+        ++format; /* skip '%' */
+
+        format = check_longlong_async_signal_safe(format, &have_longlong);
+
+        switch (*format) {
+        case 'd':
+        case 'i':
+        case 'u':
+        case 'x':
+        case 'p':
+            {
+                int64_t ival = 0;
+                uint64_t uval = 0;
+                if (*format == 'p')
+                    have_longlong = (sizeof(void *) == sizeof(uint64_t));
+                if (have_longlong) {
+                    if (*format == 'u') {
+                        uval = va_arg(ap, uint64_t);
+                    } else {
+                        ival = va_arg(ap, int64_t);
+                    }
+                } else {
+                    if (*format == 'u') {
+                        uval = va_arg(ap, uint32_t);
+                    } else {
+                        ival = va_arg(ap, int32_t);
+                    }
+                }
+
+                {
+                    char buff[22];
+                    const int base = (*format == 'x' || *format == 'p') ? 16 : 10;
+
+/* *INDENT-OFF* */
+                    char *val_as_str = (*format == 'u') ?
+                        u2string_async_signal_safe(base, uval, &buff[sizeof(buff) - 1]) :
+                        i2string_async_signal_safe(base, ival, &buff[sizeof(buff) - 1]);
+/* *INDENT-ON* */
+
+                    /* Strip off "ffffffff" if we have 'x' format without 'll' */
+                    if (*format == 'x' && !have_longlong && ival < 0) {
+                        val_as_str += 8;
+                    }
+
+                    while (*val_as_str && to < end) {
+                        *to++ = *val_as_str++;
+                    }
+                    continue;
+                }
+            }
+        case 's':
+            {
+                const char *val = va_arg(ap, char *);
+                if (!val) {
+                    val = "(null)";
+                }
+                while (*val && to < end) {
+                    *to++ = *val++;
+                }
+                continue;
+            }
+        }
+    }
+    *to = 0;
+    return (int)(to - start);
+}
+
+int snprintf_async_signal_safe(char *to, size_t n, const char *fmt, ...) {
+    int result;
+    va_list args;
+    va_start(args, fmt);
+    result = vsnprintf_async_signal_safe(to, n, fmt, args);
+    va_end(args);
+    return result;
+}
+
 #ifdef REDIS_TEST
 #include <assert.h>
 #include <sys/mman.h>
@@ -1427,5 +1657,3 @@ int utilTest(int argc, char **argv, int flags) {
     return 0;
 }
 #endif
-
-
diff --git a/src/util.h b/src/util.h
index 7f64e63ec2a..518342f02a5 100644
--- a/src/util.h
+++ b/src/util.h
@@ -1,30 +1,9 @@
 /*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __REDIS_UTIL_H
@@ -70,6 +49,7 @@ int ull2string(char *s, size_t len, unsigned long long value);
 int string2ll(const char *s, size_t slen, long long *value);
 int string2ull(const char *s, unsigned long long *value);
 int string2l(const char *s, size_t slen, long *value);
+int string2ul_base16_async_signal_safe(const char *src, size_t slen, unsigned long *result_output);
 int string2ld(const char *s, size_t slen, long double *dp);
 int string2d(const char *s, size_t slen, double *dp);
 int trimDoubleString(char *buf, size_t len);
@@ -88,7 +68,14 @@ int fileExist(char *filename);
 sds makePath(char *path, char *filename);
 int fsyncFileDir(const char *filename);
 int reclaimFilePageCache(int fd, size_t offset, size_t length);
-
+char *fgets_async_signal_safe(char *dest, int buff_size, int fd);
+int vsnprintf_async_signal_safe(char *to, size_t size, const char *format, va_list ap);
+#ifdef __GNUC__
+int snprintf_async_signal_safe(char *to, size_t n, const char *fmt, ...)
+    __attribute__((format(printf, 3, 4)));
+#else
+int snprintf_async_signal_safe(char *to, size_t n, const char *fmt, ...);
+#endif
 size_t redis_strlcpy(char *dst, const char *src, size_t dsize);
 size_t redis_strlcat(char *dst, const char *src, size_t dsize);
 
diff --git a/src/ziplist.c b/src/ziplist.c
index c891625a931..a470c956d20 100644
--- a/src/ziplist.c
+++ b/src/ziplist.c
@@ -151,8 +151,7 @@
  * ----------------------------------------------------------------------------
  *
  * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
- * Copyright (c) 2009-2017, Salvatore Sanfilippo <antirez at gmail dot com>
- * Copyright (c) 2020, Redis Labs, Inc
+ * Copyright (c) 2009-current, Redis Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/src/ziplist.h b/src/ziplist.h
index f210ba6c92d..ef38e73aa95 100644
--- a/src/ziplist.h
+++ b/src/ziplist.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-current, Redis Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/src/zipmap.c b/src/zipmap.c
index 4e984ba6d9d..83e857d4b90 100644
--- a/src/zipmap.c
+++ b/src/zipmap.c
@@ -12,32 +12,11 @@
  *
  * --------------------------------------------------------------------------
  *
- * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 /* Memory layout of a zipmap, for the map "foo" => "bar", "hello" => "world":
diff --git a/src/zipmap.h b/src/zipmap.h
index 482c96ddd80..e6127484c3d 100644
--- a/src/zipmap.h
+++ b/src/zipmap.h
@@ -4,32 +4,11 @@
  *
  * --------------------------------------------------------------------------
  *
- * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef _ZIPMAP_H
diff --git a/src/zmalloc.c b/src/zmalloc.c
index bbfa3865192..e40c6ac4f56 100644
--- a/src/zmalloc.c
+++ b/src/zmalloc.c
@@ -1,31 +1,10 @@
 /* zmalloc - total amount of allocated memory aware version of malloc()
  *
- * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "fmacros.h"
@@ -36,7 +15,6 @@
 #include <stdlib.h>
 #include <stdint.h>
 #include <unistd.h>
-#include <assert.h>
 
 #ifdef __linux__
 #include <sys/mman.h>
@@ -51,7 +29,6 @@ void zlibc_free(void *ptr) {
 }
 
 #include <string.h>
-#include <pthread.h>
 #include "zmalloc.h"
 #include "atomicvar.h"
 
@@ -60,7 +37,7 @@ void zlibc_free(void *ptr) {
 #ifdef HAVE_MALLOC_SIZE
 #define PREFIX_SIZE (0)
 #else
-/* Use at least 8 bits alignment on all systems. */
+/* Use at least 8 bytes alignment on all systems. */
 #if SIZE_MAX < 0xffffffffffffffffull
 #define PREFIX_SIZE 8
 #else
@@ -79,12 +56,14 @@ void zlibc_free(void *ptr) {
 #define calloc(count,size) tc_calloc(count,size)
 #define realloc(ptr,size) tc_realloc(ptr,size)
 #define free(ptr) tc_free(ptr)
+/* Explicitly override malloc/free etc when using jemalloc. */
 #elif defined(USE_JEMALLOC)
 #define malloc(size) je_malloc(size)
 #define calloc(count,size) je_calloc(count,size)
 #define realloc(ptr,size) je_realloc(ptr,size)
 #define free(ptr) je_free(ptr)
 #define mallocx(size,flags) je_mallocx(size,flags)
+#define rallocx(ptr,size,flags) je_rallocx(ptr,size,flags)
 #define dallocx(ptr,flags) je_dallocx(ptr,flags)
 #endif
 
@@ -123,6 +102,7 @@ static inline void *ztrymalloc_usable_internal(size_t size, size_t *usable) {
     if (usable) *usable = size;
     return ptr;
 #else
+    size = MALLOC_MIN_SIZE(size);
     *((size_t*)ptr) = size;
     update_zmalloc_stat_alloc(size+PREFIX_SIZE);
     if (usable) *usable = size;
@@ -166,6 +146,53 @@ void *zmalloc_usable(size_t size, size_t *usable) {
     return ptr;
 }
 
+#if defined(USE_JEMALLOC)
+void *zmalloc_with_flags(size_t size, int flags) {
+    if (size >= SIZE_MAX/2) zmalloc_oom_handler(size);
+    void *ptr = mallocx(size+PREFIX_SIZE, flags);
+    if (!ptr) zmalloc_oom_handler(size);
+    update_zmalloc_stat_alloc(zmalloc_size(ptr));
+    return ptr;
+}
+
+void *zrealloc_with_flags(void *ptr, size_t size, int flags) {
+    /* Not allocating anything, just redirect to free. */
+    if (size == 0 && ptr != NULL) {
+        zfree_with_flags(ptr, flags);
+        return NULL;
+    }
+
+    /* Not freeing anything, just redirect to malloc. */
+    if (ptr == NULL)
+        return zmalloc_with_flags(size, flags);
+
+    /* Possible overflow, return NULL, so that the caller can panic or handle a failed allocation. */
+    if (size >= SIZE_MAX/2) {
+        zfree_with_flags(ptr, flags);
+        zmalloc_oom_handler(size);
+        return NULL;
+    }
+
+    size_t oldsize = zmalloc_size(ptr);
+    void *newptr = rallocx(ptr, size, flags);
+    if (newptr == NULL) {
+        zmalloc_oom_handler(size);
+        return NULL;
+    }
+
+    update_zmalloc_stat_free(oldsize);
+    size = zmalloc_size(newptr);
+    update_zmalloc_stat_alloc(size);
+    return newptr;
+}
+
+void zfree_with_flags(void *ptr, int flags) {
+    if (ptr == NULL) return;
+    update_zmalloc_stat_free(zmalloc_size(ptr));
+    dallocx(ptr, flags);
+}
+#endif
+
 /* Allocation and free functions that bypass the thread cache
  * and go straight to the allocator arena bins.
  * Currently implemented only for jemalloc. Used for online defragmentation. */
@@ -199,6 +226,7 @@ static inline void *ztrycalloc_usable_internal(size_t size, size_t *usable) {
     if (usable) *usable = size;
     return ptr;
 #else
+    size = MALLOC_MIN_SIZE(size);
     *((size_t*)ptr) = size;
     update_zmalloc_stat_alloc(size+PREFIX_SIZE);
     if (usable) *usable = size;
@@ -458,7 +486,7 @@ void zmadvise_dontneed(void *ptr) {
 #include <fcntl.h>
 #endif
 
-/* Get the i'th field from "/proc/self/stats" note i is 1 based as appears in the 'proc' man page */
+/* Get the i'th field from "/proc/self/stat" note i is 1 based as appears in the 'proc' man page */
 int get_proc_stat_ll(int i, long long *res) {
 #if defined(HAVE_PROC_STAT)
     char buf[4096];
@@ -608,7 +636,7 @@ size_t zmalloc_get_rss(void) {
     if ((fd = open(filename,O_RDONLY)) == -1) return 0;
     if (ioctl(fd, PIOCPSINFO, &info) == -1) {
         close(fd);
-	return 0;
+        return 0;
     }
 
     close(fd);
@@ -627,15 +655,75 @@ size_t zmalloc_get_rss(void) {
 
 #if defined(USE_JEMALLOC)
 
-int zmalloc_get_allocator_info(size_t *allocated,
-                               size_t *active,
-                               size_t *resident) {
-    uint64_t epoch = 1;
+#include "redisassert.h"
+
+/* Compute the total memory wasted in fragmentation of inside small arena bins.
+ * Done by summing the memory in unused regs in all slabs of all small bins.
+ *
+ * Pass in arena to get the information of the specified arena, otherwise pass
+ * in MALLCTL_ARENAS_ALL to get all. */
+size_t zmalloc_get_frag_smallbins_by_arena(unsigned int arena) {
+    unsigned nbins;
+    size_t sz, frag = 0;
+    char buf[100];
+
+    sz = sizeof(unsigned);
+    assert(!je_mallctl("arenas.nbins", &nbins, &sz, NULL, 0));
+    for (unsigned j = 0; j < nbins; j++) {
+        size_t curregs, curslabs, reg_size;
+        uint32_t nregs;
+
+        /* The size of the current bin */
+        snprintf(buf, sizeof(buf), "arenas.bin.%u.size", j);
+        sz = sizeof(size_t);
+        assert(!je_mallctl(buf, &reg_size, &sz, NULL, 0));
+
+        /* Number of used regions in the bin */
+        snprintf(buf, sizeof(buf), "stats.arenas.%u.bins.%u.curregs", arena, j);
+        sz = sizeof(size_t);
+        assert(!je_mallctl(buf, &curregs, &sz, NULL, 0));
+
+        /* Number of regions per slab */
+        snprintf(buf, sizeof(buf), "arenas.bin.%u.nregs", j);
+        sz = sizeof(uint32_t);
+        assert(!je_mallctl(buf, &nregs, &sz, NULL, 0));
+
+        /* Number of current slabs in the bin */
+        snprintf(buf, sizeof(buf), "stats.arenas.%u.bins.%u.curslabs", arena, j);
+        sz = sizeof(size_t);
+        assert(!je_mallctl(buf, &curslabs, &sz, NULL, 0));
+
+        /* Calculate the fragmentation bytes for the current bin and add it to the total. */
+        frag += ((nregs * curslabs) - curregs) * reg_size;
+    }
+
+    return frag;
+}
+
+/* Compute the total memory wasted in fragmentation of inside small arena bins.
+ * Done by summing the memory in unused regs in all slabs of all small bins. */
+size_t zmalloc_get_frag_smallbins(void) {
+    return zmalloc_get_frag_smallbins_by_arena(MALLCTL_ARENAS_ALL);
+}
+
+/* Get memory allocation information from allocator.
+ *
+ * refresh_stats indicates whether to refresh cached statistics.
+ * For the meaning of the other parameters, please refer to the function implementation
+ * and INFO's allocator_* in redis-doc. */
+int zmalloc_get_allocator_info(int refresh_stats, size_t *allocated, size_t *active, size_t *resident,
+                               size_t *retained, size_t *muzzy, size_t *frag_smallbins_bytes)
+{
     size_t sz;
     *allocated = *resident = *active = 0;
+
     /* Update the statistics cached by mallctl. */
-    sz = sizeof(epoch);
-    je_mallctl("epoch", &epoch, &sz, &epoch, sz);
+    if (refresh_stats) {
+        uint64_t epoch = 1;
+        sz = sizeof(epoch);
+        je_mallctl("epoch", &epoch, &sz, &epoch, sz);
+    }
+
     sz = sizeof(size_t);
     /* Unlike RSS, this does not include RSS from shared libraries and other non
      * heap mappings. */
@@ -646,9 +734,78 @@ int zmalloc_get_allocator_info(size_t *allocated,
     /* Unlike zmalloc_used_memory, this matches the stats.resident by taking
      * into account all allocations done by this process (not only zmalloc). */
     je_mallctl("stats.allocated", allocated, &sz, NULL, 0);
+
+    /* Retained memory is memory released by `madvised(..., MADV_DONTNEED)`, which is not part
+     * of RSS or mapped memory, and doesn't have a strong association with physical memory in the OS.
+     * It is still part of the VM-Size, and may be used again in later allocations. */
+    if (retained) {
+        *retained = 0;
+        je_mallctl("stats.retained", retained, &sz, NULL, 0);
+    }
+
+    /* Unlike retained, Muzzy representats memory released with `madvised(..., MADV_FREE)`.
+     * These pages will show as RSS for the process, until the OS decides to re-use them. */
+    if (muzzy) {
+        char buf[100];
+        size_t pmuzzy, page;
+        snprintf(buf, sizeof(buf), "stats.arenas.%u.pmuzzy", MALLCTL_ARENAS_ALL);
+        assert(!je_mallctl(buf, &pmuzzy, &sz, NULL, 0));
+        assert(!je_mallctl("arenas.page", &page, &sz, NULL, 0));
+        *muzzy = pmuzzy * page;
+    }
+
+    /* Total size of consumed meomry in unused regs in small bins (AKA external fragmentation). */
+    *frag_smallbins_bytes = zmalloc_get_frag_smallbins();
+    return 1;
+}
+
+/* Get the specified arena memory allocation information from allocator.
+ *
+ * refresh_stats indicates whether to refresh cached statistics.
+ * For the meaning of the other parameters, please refer to the function implementation
+ * and INFO's allocator_* in redis-doc. */
+int zmalloc_get_allocator_info_by_arena(unsigned int arena, int refresh_stats, size_t *allocated,
+                                        size_t *active, size_t *resident, size_t *frag_smallbins_bytes)
+{
+    char buf[100];
+    size_t sz;
+    *allocated = *resident = *active = 0;
+
+    /* Update the statistics cached by mallctl. */
+    if (refresh_stats) {
+        uint64_t epoch = 1;
+        sz = sizeof(epoch);
+        je_mallctl("epoch", &epoch, &sz, &epoch, sz);
+    }
+
+    sz = sizeof(size_t);
+    /* Unlike RSS, this does not include RSS from shared libraries and other non
+     * heap mappings. */
+    snprintf(buf, sizeof(buf), "stats.arenas.%u.small.resident", arena);
+    je_mallctl(buf, resident, &sz, NULL, 0);
+    /* Unlike resident, this doesn't not include the pages jemalloc reserves
+     * for re-use (purge will clean that). */
+    size_t pactive, page;
+    snprintf(buf, sizeof(buf), "stats.arenas.%u.pactive", arena);
+    assert(!je_mallctl(buf, &pactive, &sz, NULL, 0));
+    assert(!je_mallctl("arenas.page", &page, &sz, NULL, 0));
+    *active = pactive * page;
+    /* Unlike zmalloc_used_memory, this matches the stats.resident by taking
+     * into account all allocations done by this process (not only zmalloc). */
+    size_t small_allcated, large_allacted;
+    snprintf(buf, sizeof(buf), "stats.arenas.%u.small.allocated", arena);
+    assert(!je_mallctl(buf, &small_allcated, &sz, NULL, 0));
+    *allocated += small_allcated;
+    snprintf(buf, sizeof(buf), "stats.arenas.%u.large.allocated", arena);
+    assert(!je_mallctl(buf, &large_allacted, &sz, NULL, 0));
+    *allocated += large_allacted;
+
+    /* Total size of consumed meomry in unused regs in small bins (AKA external fragmentation). */
+    *frag_smallbins_bytes = zmalloc_get_frag_smallbins_by_arena(arena);
     return 1;
 }
 
+
 void set_jemalloc_bg_thread(int enable) {
     /* let jemalloc do purging asynchronously, required when there's no traffic 
      * after flushdb */
@@ -662,7 +819,7 @@ int jemalloc_purge(void) {
     unsigned narenas = 0;
     size_t sz = sizeof(unsigned);
     if (!je_mallctl("arenas.narenas", &narenas, &sz, NULL, 0)) {
-        snprintf(tmp, sizeof(tmp), "arena.%d.purge", narenas);
+        snprintf(tmp, sizeof(tmp), "arena.%u.purge", narenas);
         if (!je_mallctl(tmp, NULL, 0, NULL, 0))
             return 0;
     }
@@ -671,13 +828,26 @@ int jemalloc_purge(void) {
 
 #else
 
-int zmalloc_get_allocator_info(size_t *allocated,
-                               size_t *active,
-                               size_t *resident) {
-    *allocated = *resident = *active = 0;
+int zmalloc_get_allocator_info(int refresh_stats, size_t *allocated, size_t *active, size_t *resident,
+                               size_t *retained, size_t *muzzy, size_t *frag_smallbins_bytes)
+{
+    UNUSED(refresh_stats);
+    *allocated = *resident = *active = *frag_smallbins_bytes = 0;
+    if (retained) *retained = 0;
+    if (muzzy) *muzzy = 0;
     return 1;
 }
 
+int zmalloc_get_allocator_info_by_arena(unsigned int arena, int refresh_stats, size_t *allocated,
+                                        size_t *active, size_t *resident, size_t *frag_smallbins_bytes)
+{
+    UNUSED(arena);
+    UNUSED(refresh_stats);
+    *allocated = *resident = *active = *frag_smallbins_bytes = 0;
+    return 1;
+}
+
+
 void set_jemalloc_bg_thread(int enable) {
     ((void)(enable));
 }
@@ -832,20 +1002,55 @@ size_t zmalloc_get_memory_size(void) {
 }
 
 #ifdef REDIS_TEST
+#include "testhelp.h"
+#include "redisassert.h"
+
+#define TEST(name) printf("test — %s\n", name);
+
 int zmalloc_test(int argc, char **argv, int flags) {
-    void *ptr;
+    void *ptr, *ptr2;
 
     UNUSED(argc);
     UNUSED(argv);
     UNUSED(flags);
+
     printf("Malloc prefix size: %d\n", (int) PREFIX_SIZE);
-    printf("Initial used memory: %zu\n", zmalloc_used_memory());
-    ptr = zmalloc(123);
-    printf("Allocated 123 bytes; used: %zu\n", zmalloc_used_memory());
-    ptr = zrealloc(ptr, 456);
-    printf("Reallocated to 456 bytes; used: %zu\n", zmalloc_used_memory());
-    zfree(ptr);
-    printf("Freed pointer; used: %zu\n", zmalloc_used_memory());
+
+    TEST("Initial used memory is 0") {
+        assert(zmalloc_used_memory() == 0);
+    }
+
+    TEST("Allocated 123 bytes") {
+        ptr = zmalloc(123);
+        printf("Allocated 123 bytes; used: %zu\n", zmalloc_used_memory());
+    }
+
+    TEST("Reallocated to 456 bytes") {
+        ptr = zrealloc(ptr, 456);
+        printf("Reallocated to 456 bytes; used: %zu\n", zmalloc_used_memory());
+    }
+
+    TEST("Callocated 123 bytes") {
+        ptr2 = zcalloc(123);
+        printf("Callocated 123 bytes; used: %zu\n", zmalloc_used_memory());
+    }
+
+    TEST("Freed pointers") {
+        zfree(ptr);
+        zfree(ptr2);
+        printf("Freed pointers; used: %zu\n", zmalloc_used_memory());
+    }
+
+    TEST("Allocated 0 bytes") {
+        ptr = zmalloc(0);
+        printf("Allocated 0 bytes; used: %zu\n", zmalloc_used_memory());
+        zfree(ptr);
+    }
+
+    TEST("At the end used memory is 0") {
+        assert(zmalloc_used_memory() == 0);
+    }
+
     return 0;
 }
 #endif
diff --git a/src/zmalloc.h b/src/zmalloc.h
index 491013a8530..e8100203eb4 100644
--- a/src/zmalloc.h
+++ b/src/zmalloc.h
@@ -1,31 +1,10 @@
 /* zmalloc - total amount of allocated memory aware version of malloc()
  *
- * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #ifndef __ZMALLOC_H
@@ -122,7 +101,10 @@ __attribute__((malloc)) char *zstrdup(const char *s);
 size_t zmalloc_used_memory(void);
 void zmalloc_set_oom_handler(void (*oom_handler)(size_t));
 size_t zmalloc_get_rss(void);
-int zmalloc_get_allocator_info(size_t *allocated, size_t *active, size_t *resident);
+int zmalloc_get_allocator_info(int refresh_stats, size_t *allocated, size_t *active, size_t *resident,
+                               size_t *retained, size_t *muzzy, size_t *frag_smallbins_bytes);
+int zmalloc_get_allocator_info_by_arena(unsigned int arena, int refresh_stats, size_t *allocated,
+                                        size_t *active, size_t *resident, size_t *frag_smallbins_bytes);
 void set_jemalloc_bg_thread(int enable);
 int jemalloc_purge(void);
 size_t zmalloc_get_private_dirty(long pid);
@@ -131,6 +113,12 @@ size_t zmalloc_get_memory_size(void);
 void zlibc_free(void *ptr);
 void zmadvise_dontneed(void *ptr);
 
+#if defined(USE_JEMALLOC)
+void *zmalloc_with_flags(size_t size, int flags);
+void *zrealloc_with_flags(void *ptr, size_t size, int flags);
+void zfree_with_flags(void *ptr, int flags);
+#endif
+
 #ifdef HAVE_DEFRAG
 void zfree_no_tcache(void *ptr);
 __attribute__((malloc)) void *zmalloc_no_tcache(size_t size);
diff --git a/tests/assets/user.acl b/tests/assets/user.acl
index 926ac54f6f8..56fc0c25493 100644
--- a/tests/assets/user.acl
+++ b/tests/assets/user.acl
@@ -1,3 +1,4 @@
 user alice on allcommands allkeys &* >alice
 user bob on -@all +@set +acl ~set* &* >bob
+user doug on resetchannels &test +@all ~* >doug
 user default on nopass ~* &* +@all
diff --git a/tests/cluster/cluster.tcl b/tests/cluster/cluster.tcl
index 9931eac8495..5f9f8d03b7d 100644
--- a/tests/cluster/cluster.tcl
+++ b/tests/cluster/cluster.tcl
@@ -1,8 +1,10 @@
 # Cluster-specific test functions.
 #
-# Copyright (C) 2014 Salvatore Sanfilippo antirez@gmail.com
-# This software is released under the BSD License. See the COPYING file for
-# more information.
+# Copyright (C) 2014-Present, Redis Ltd.
+# All Rights reserved.
+#
+# Licensed under your choice of the Redis Source Available License 2.0
+# (RSALv2) or the Server Side Public License v1 (SSPLv1).
 
 # Track cluster configuration as created by create_cluster below
 set ::cluster_master_nodes 0
diff --git a/tests/cluster/run.tcl b/tests/cluster/run.tcl
index 86c5f589b7e..52aab977eaa 100644
--- a/tests/cluster/run.tcl
+++ b/tests/cluster/run.tcl
@@ -1,6 +1,10 @@
-# Cluster test suite. Copyright (C) 2014 Salvatore Sanfilippo antirez@gmail.com
-# This software is released under the BSD License. See the COPYING file for
-# more information.
+# Cluster test suite.
+#
+# Copyright (C) 2014-Present, Redis Ltd.
+# All Rights reserved.
+#
+# Licensed under your choice of the Redis Source Available License 2.0
+# (RSALv2) or the Server Side Public License v1 (SSPLv1).
 
 cd tests/cluster
 source cluster.tcl
diff --git a/tests/cluster/tests/05-slave-selection.tcl b/tests/cluster/tests/05-slave-selection.tcl
index bdb20a3b7cd..c6b48880c28 100644
--- a/tests/cluster/tests/05-slave-selection.tcl
+++ b/tests/cluster/tests/05-slave-selection.tcl
@@ -25,8 +25,11 @@ test "The first master has actually two slaves" {
 test "CLUSTER SLAVES and CLUSTER REPLICAS output is consistent" {
     # Because we already have command output that cover CLUSTER REPLICAS elsewhere,
     # here we simply judge whether their output is consistent to cover CLUSTER SLAVES.
-    set res [R 0 cluster slaves [R 0 CLUSTER MYID]]
-    set res2 [R 0 cluster replicas [R 0 CLUSTER MYID]]
+    set myid [R 0 CLUSTER MYID]
+    R 0 multi
+    R 0 cluster slaves $myid
+    R 0 cluster replicas $myid
+    lassign [R 0 exec] res res2
     assert_equal $res $res2
 }
 
diff --git a/tests/cluster/tests/15-cluster-slots.tcl b/tests/cluster/tests/15-cluster-slots.tcl
index 892e9049b0c..0f82c78bebb 100644
--- a/tests/cluster/tests/15-cluster-slots.tcl
+++ b/tests/cluster/tests/15-cluster-slots.tcl
@@ -60,6 +60,33 @@ test "slot migration is valid from primary to another primary" {
     assert_equal {OK} [$nodeto(link) cluster setslot $slot node $nodeto(id)]
 }
 
+test "Client unblocks after slot migration from one primary to another" {
+    set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
+    set key mystream
+    set slot [$cluster cluster keyslot $key]
+    array set nodefrom [$cluster masternode_for_slot $slot]
+    array set nodeto [$cluster masternode_notfor_slot $slot]
+
+    # Create a stream group on the source node
+    $nodefrom(link) XGROUP CREATE $key mygroup $ MKSTREAM 
+
+    # block another client on xreadgroup
+    set rd [redis_deferring_client_by_addr $nodefrom(host) $nodefrom(port)]
+    $rd XREADGROUP GROUP mygroup Alice BLOCK 0 STREAMS $key ">"
+    wait_for_condition 1000 50 {
+        [getInfoProperty [$nodefrom(link) info clients] blocked_clients] eq {1}
+    } else {
+        fail "client wasn't blocked"
+    }
+
+    # Start slot migration from the source node to the target node.
+    # Because the `unblock_on_nokey` option of xreadgroup is set to 1, the client
+    # will be unblocked when the key `mystream` is migrated.
+    assert_equal {OK} [$nodefrom(link) CLUSTER SETSLOT $slot MIGRATING $nodeto(id)]
+    assert_equal {OK} [$nodeto(link) CLUSTER SETSLOT $slot IMPORTING $nodefrom(id)]
+    $nodefrom(link) MIGRATE $nodeto(host) $nodeto(port) $key 0 5000
+}
+
 test "slot migration is invalid from primary to replica" {
     set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
     set key order1
diff --git a/tests/cluster/tests/25-pubsubshard-slot-migration.tcl b/tests/cluster/tests/25-pubsubshard-slot-migration.tcl
index 0f59ffef273..fd774a8d7b2 100644
--- a/tests/cluster/tests/25-pubsubshard-slot-migration.tcl
+++ b/tests/cluster/tests/25-pubsubshard-slot-migration.tcl
@@ -10,6 +10,18 @@ test "Cluster is up" {
 
 set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
 
+proc get_addr_replica_serving_slot slot {
+    set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
+    array set node [$cluster masternode_for_slot $slot]
+
+    set replicanodeinfo [$cluster cluster replicas $node(id)]
+    set args [split $replicanodeinfo " "]
+    set addr [lindex [split [lindex $args 1] @] 0]
+    set replicahost [lindex [split $addr :] 0]
+    set replicaport [lindex [split $addr :] 1]
+    return [list $replicahost $replicaport]
+}
+
 test "Migrate a slot, verify client receives sunsubscribe on primary serving the slot." {
 
     # Setup the to and from node
@@ -98,11 +110,9 @@ test "Migrate a slot, verify client receives sunsubscribe on replica serving the
     array set nodeto [$cluster masternode_notfor_slot $slot]
 
     # Get replica node serving slot (mychannel) to connect a client.
-    set replicanodeinfo [$cluster cluster replicas $nodefrom(id)]
-    set args [split $replicanodeinfo " "]
-    set addr [lindex [split [lindex $args 1] @] 0]
-    set replicahost [lindex [split $addr :] 0]
-    set replicaport [lindex [split $addr :] 1]
+    set replica_addr [get_addr_replica_serving_slot $slot]
+    set replicahost [lindex $replica_addr 0]
+    set replicaport [lindex $replica_addr 1]
     set subscribeclient [redis_deferring_client_by_addr $replicahost $replicaport]
 
     $subscribeclient deferred 1
@@ -127,6 +137,37 @@ test "Migrate a slot, verify client receives sunsubscribe on replica serving the
     $subscribeclient close
 }
 
+test "Move a replica to another primary, verify client receives sunsubscribe on replica serving the slot." {
+    # Setup the to and from node
+    set channelname mychannel2
+    set slot [$cluster cluster keyslot $channelname]
+
+    array set nodefrom [$cluster masternode_for_slot $slot]
+    array set nodeto [$cluster masternode_notfor_slot $slot]
+    set replica_addr [get_addr_replica_serving_slot $slot]
+    set replica_host [lindex $replica_addr 0]
+    set replica_port [lindex $replica_addr 1]
+    set replica_client [redis_client_by_addr $replica_host $replica_port]
+    set subscribeclient [redis_deferring_client_by_addr $replica_host $replica_port]
+
+    $subscribeclient deferred 1
+    $subscribeclient ssubscribe $channelname
+    $subscribeclient read
+
+    # Verify subscribe is still valid, able to receive messages.
+    $nodefrom(link) spublish $channelname hello
+    assert_equal {smessage mychannel2 hello} [$subscribeclient read]
+
+    assert_equal {OK} [$replica_client cluster replicate $nodeto(id)]
+
+    set msg [$subscribeclient read]
+    assert {"sunsubscribe" eq [lindex $msg 0]}
+    assert {$channelname eq [lindex $msg 1]}
+    assert {"0" eq [lindex $msg 2]}
+
+    $subscribeclient close
+}
+
 test "Delete a slot, verify sunsubscribe message" {
     set channelname ch2
     set slot [$cluster cluster keyslot $channelname]
diff --git a/tests/cluster/tests/26-pubsubshard.tcl b/tests/cluster/tests/26-pubsubshard.tcl
index 2619eda0a1f..34939acf7c6 100644
--- a/tests/cluster/tests/26-pubsubshard.tcl
+++ b/tests/cluster/tests/26-pubsubshard.tcl
@@ -56,6 +56,21 @@ test "client can subscribe to multiple shard channels across different slots in
     $cluster sunsubscribe ch7
 }
 
+test "sunsubscribe without specifying any channel would unsubscribe all shard channels subscribed" {
+    set publishclient [redis_client_by_addr $publishnode(host) $publishnode(port)]
+    set subscribeclient [redis_deferring_client_by_addr $publishnode(host) $publishnode(port)]
+
+    set sub_res [ssubscribe $subscribeclient [list "\{channel.0\}1" "\{channel.0\}2" "\{channel.0\}3"]]
+    assert_equal [list 1 2 3] $sub_res
+    sunsubscribe $subscribeclient
+
+    assert_equal 0 [$publishclient spublish "\{channel.0\}1" hello]
+    assert_equal 0 [$publishclient spublish "\{channel.0\}2" hello]
+    assert_equal 0 [$publishclient spublish "\{channel.0\}3" hello]
+
+    $publishclient close
+    $subscribeclient close
+}
 
 test "Verify Pub/Sub and Pub/Sub shard no overlap" {
     set slot [$cluster cluster keyslot "channel.0"]
@@ -91,4 +106,25 @@ test "Verify Pub/Sub and Pub/Sub shard no overlap" {
     $publishclient close
     $subscribeclient close
     $subscribeshardclient close
-}
\ No newline at end of file
+}
+
+test "PUBSUB channels/shardchannels" {
+    set subscribeclient [redis_deferring_client_by_addr $publishnode(host) $publishnode(port)]
+    set subscribeclient2 [redis_deferring_client_by_addr $publishnode(host) $publishnode(port)]
+    set subscribeclient3 [redis_deferring_client_by_addr $publishnode(host) $publishnode(port)]
+    set publishclient [redis_client_by_addr  $publishnode(host) $publishnode(port)]
+
+    ssubscribe $subscribeclient [list "\{channel.0\}1"]
+    ssubscribe $subscribeclient2 [list "\{channel.0\}2"]
+    ssubscribe $subscribeclient3 [list "\{channel.0\}3"]
+    assert_equal {3} [llength [$publishclient pubsub shardchannels]]
+
+    subscribe $subscribeclient [list "\{channel.0\}4"]
+    assert_equal {3} [llength [$publishclient pubsub shardchannels]]
+
+    sunsubscribe $subscribeclient
+    set channel_list [$publishclient pubsub shardchannels]
+    assert_equal {2} [llength $channel_list]
+    assert {[lsearch -exact $channel_list "\{channel.0\}2"] >= 0}
+    assert {[lsearch -exact $channel_list "\{channel.0\}3"] >= 0}
+}
diff --git a/tests/instances.tcl b/tests/instances.tcl
index 56a51a87252..841b872e369 100644
--- a/tests/instances.tcl
+++ b/tests/instances.tcl
@@ -3,9 +3,11 @@
 # basic capabilities for spawning and handling N parallel Redis / Sentinel
 # instances.
 #
-# Copyright (C) 2014 Salvatore Sanfilippo antirez@gmail.com
-# This software is released under the BSD License. See the COPYING file for
-# more information.
+# Copyright (C) 2014-Present, Redis Ltd.
+# All Rights reserved.
+#
+# Licensed under your choice of the Redis Source Available License 2.0
+# (RSALv2) or the Server Side Public License v1 (SSPLv1).
 
 package require Tcl 8.5
 
diff --git a/tests/integration/aof-multi-part.tcl b/tests/integration/aof-multi-part.tcl
index 1d41a8a832a..bdd03823398 100644
--- a/tests/integration/aof-multi-part.tcl
+++ b/tests/integration/aof-multi-part.tcl
@@ -38,9 +38,9 @@ tags {"external:skip"} {
             append_to_manifest "file appendonly.aof.2.incr.aof seq 2 type i\n"
         }
 
-        start_server_aof [list dir $server_path] {
+        start_server_aof_ex [list dir $server_path] [list wait_ready false] {
             wait_for_condition 100 50 {
-                ! [is_alive $srv]
+                ! [is_alive [srv pid]]
             } else {
                 fail "AOF loading didn't fail"
             }
@@ -65,9 +65,9 @@ tags {"external:skip"} {
             append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n"
         }
 
-        start_server_aof [list dir $server_path] {
+        start_server_aof_ex [list dir $server_path] [list wait_ready false] {
             wait_for_condition 100 50 {
-                ! [is_alive $srv]
+                ! [is_alive [srv pid]]
             } else {
                 fail "AOF loading didn't fail"
             }
@@ -93,9 +93,9 @@ tags {"external:skip"} {
             append_to_manifest "file appendonly.aof.3.incr.aof seq 3 type i\n"
         }
 
-        start_server_aof [list dir $server_path] {
+        start_server_aof_ex [list dir $server_path] [list wait_ready false] {
             wait_for_condition 100 50 {
-                ! [is_alive $srv]
+                ! [is_alive [srv pid]]
             } else {
                 fail "AOF loading didn't fail"
             }
@@ -125,9 +125,9 @@ tags {"external:skip"} {
             append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n"
         }
 
-        start_server_aof [list dir $server_path] {
+        start_server_aof_ex [list dir $server_path] [list wait_ready false] {
             wait_for_condition 100 50 {
-                ! [is_alive $srv]
+                ! [is_alive [srv pid]]
             } else {
                 fail "AOF loading didn't fail"
             }
@@ -152,9 +152,9 @@ tags {"external:skip"} {
             append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n"
         }
 
-        start_server_aof [list dir $server_path] {
+        start_server_aof_ex [list dir $server_path] [list wait_ready false] {
             wait_for_condition 100 50 {
-                ! [is_alive $srv]
+                ! [is_alive [srv pid]]
             } else {
                 fail "AOF loading didn't fail"
             }
@@ -179,9 +179,9 @@ tags {"external:skip"} {
             append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n"
         }
 
-        start_server_aof [list dir $server_path] {
+        start_server_aof_ex [list dir $server_path] [list wait_ready false] {
             wait_for_condition 100 50 {
-                ! [is_alive $srv]
+                ! [is_alive [srv pid]]
             } else {
                 fail "AOF loading didn't fail"
             }
@@ -206,9 +206,9 @@ tags {"external:skip"} {
             append_to_manifest "file appendonly.aof.1.incr.aof type i\n"
         }
 
-        start_server_aof [list dir $server_path] {
+        start_server_aof_ex [list dir $server_path] [list wait_ready false] {
             wait_for_condition 100 50 {
-                ! [is_alive $srv]
+                ! [is_alive [srv pid]]
             } else {
                 fail "AOF loading didn't fail"
             }
@@ -233,9 +233,9 @@ tags {"external:skip"} {
             append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n"
         }
 
-        start_server_aof [list dir $server_path] {
+        start_server_aof_ex [list dir $server_path] [list wait_ready false] {
             wait_for_condition 100 50 {
-                ! [is_alive $srv]
+                ! [is_alive [srv pid]]
             } else {
                 fail "AOF loading didn't fail"
             }
@@ -260,9 +260,9 @@ tags {"external:skip"} {
             append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i newkey\n"
         }
 
-        start_server_aof [list dir $server_path] {
+        start_server_aof_ex [list dir $server_path] [list wait_ready false] {
             wait_for_condition 100 50 {
-                ! [is_alive $srv]
+                ! [is_alive [srv pid]]
             } else {
                 fail "AOF loading didn't fail"
             }
@@ -277,9 +277,9 @@ tags {"external:skip"} {
         create_aof_manifest $aof_dirpath $aof_manifest_file {
         }
 
-        start_server_aof [list dir $server_path] {
+        start_server_aof_ex [list dir $server_path] [list wait_ready false] {
             wait_for_condition 100 50 {
-                ! [is_alive $srv]
+                ! [is_alive [srv pid]]
             } else {
                 fail "AOF loading didn't fail"
             }
@@ -292,9 +292,9 @@ tags {"external:skip"} {
 
     test {Multi Part AOF can start when no aof and no manifest} {
         start_server_aof [list dir $server_path] {
-            assert_equal 1 [is_alive $srv]
+            assert_equal 1 [is_alive [srv pid]]
 
-            set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            set client [redis [srv host] [srv port] 0 $::tls]
 
             assert_equal OK [$client set k1 v1]
             assert_equal v1 [$client get k1]
@@ -307,7 +307,7 @@ tags {"external:skip"} {
         create_aof_dir $aof_dirpath
 
         start_server_aof [list dir $server_path] {
-            assert_equal 1 [is_alive $srv]
+            assert_equal 1 [is_alive [srv pid]]
         }
     }
 
@@ -331,8 +331,8 @@ tags {"external:skip"} {
         }
 
         start_server_aof [list dir $server_path] {
-            assert_equal 1 [is_alive $srv]
-            set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            assert_equal 1 [is_alive [srv pid]]
+            set client [redis [srv host] [srv port] 0 $::tls]
             wait_done_loading $client
 
             assert_equal v1 [$client get k1]
@@ -363,8 +363,8 @@ tags {"external:skip"} {
         }
 
         start_server_aof [list dir $server_path] {
-            assert_equal 1 [is_alive $srv]
-            set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            assert_equal 1 [is_alive [srv pid]]
+            set client [redis [srv host] [srv port] 0 $::tls]
             wait_done_loading $client
 
             assert_equal v1 [$client get k1]
@@ -394,8 +394,8 @@ tags {"external:skip"} {
         }
 
         start_server_aof [list dir $server_path] {
-            assert_equal 1 [is_alive $srv]
-            set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            assert_equal 1 [is_alive [srv pid]]
+            set client [redis [srv host] [srv port] 0 $::tls]
             wait_done_loading $client
 
             assert_equal v1 [$client get k1]
@@ -414,9 +414,9 @@ tags {"external:skip"} {
         }
 
         start_server_aof [list dir $server_path] {
-            assert_equal 1 [is_alive $srv]
+            assert_equal 1 [is_alive [srv pid]]
 
-            set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            set client [redis [srv host] [srv port] 0 $::tls]
             wait_done_loading $client
 
             assert_equal v1 [$client get k1]
@@ -455,9 +455,9 @@ tags {"external:skip"} {
     test {Multi Part AOF can load data from old version redis (rdb preamble yes)} {
         exec cp tests/assets/rdb-preamble.aof $aof_old_name_old_path
         start_server_aof [list dir $server_path] {
-            assert_equal 1 [is_alive $srv]
+            assert_equal 1 [is_alive [srv pid]]
 
-            set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            set client [redis [srv host] [srv port] 0 $::tls]
             wait_done_loading $client
 
             # k1 k2 in rdb header and k3 in AOF tail
@@ -507,9 +507,9 @@ tags {"external:skip"} {
         }
 
         start_server_aof [list dir $server_path] {
-            assert_equal 1 [is_alive $srv]
+            assert_equal 1 [is_alive [srv pid]]
 
-            set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            set client [redis [srv host] [srv port] 0 $::tls]
             wait_done_loading $client
 
             assert_equal v1 [$client get k1]
@@ -546,9 +546,9 @@ tags {"external:skip"} {
         }
 
         start_server_aof [list dir $server_path] {
-            assert_equal 1 [is_alive $srv]
+            assert_equal 1 [is_alive [srv pid]]
 
-            set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            set client [redis [srv host] [srv port] 0 $::tls]
             wait_done_loading $client
 
             assert_equal 0 [$client exists k1]
@@ -577,9 +577,9 @@ tags {"external:skip"} {
             append_to_manifest "file appendonly.aof seq 1 type b\n"
         }
 
-        start_server_aof [list dir $server_path] {
+        start_server_aof_ex [list dir $server_path] [list wait_ready false] {
             wait_for_condition 100 50 {
-                ! [is_alive $srv]
+                ! [is_alive [srv pid]]
             } else {
                 fail "AOF loading didn't fail"
             }
@@ -604,7 +604,7 @@ tags {"external:skip"} {
         }
 
         start_server_aof [list dir $server_path] {
-            set redis1 [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            set redis1 [redis [srv host] [srv port] 0 $::tls]
 
             start_server [list overrides [list dir $server_path appendonly yes appendfilename appendonly.aof2]] {
                 set redis2 [redis [srv host] [srv port] 0 $::tls]
@@ -702,7 +702,7 @@ tags {"external:skip"} {
 
     test {Multi Part AOF can create BASE (RDB format) when redis starts from empty} {
         start_server_aof [list dir $server_path] {
-            set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            set client [redis [srv host] [srv port] 0 $::tls]
             wait_done_loading $client
 
             assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.1${::base_aof_sufix}${::rdb_format_suffix}"]
@@ -725,7 +725,7 @@ tags {"external:skip"} {
 
     test {Multi Part AOF can create BASE (AOF format) when redis starts from empty} {
         start_server_aof [list dir $server_path aof-use-rdb-preamble no] {
-            set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            set client [redis [srv host] [srv port] 0 $::tls]
             wait_done_loading $client
 
             assert_equal 1 [check_file_exist $aof_dirpath "${aof_basename}.1${::base_aof_sufix}${::aof_format_suffix}"]
diff --git a/tests/integration/aof-race.tcl b/tests/integration/aof-race.tcl
index 32f3a742ae2..839858b7b14 100644
--- a/tests/integration/aof-race.tcl
+++ b/tests/integration/aof-race.tcl
@@ -7,8 +7,8 @@ tags {"aof external:skip"} {
     # cleaned after a child responsible for an AOF rewrite exited. This buffer
     # was subsequently appended to the new AOF, resulting in duplicate commands.
     start_server_aof [list dir $server_path] {
-        set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
-        set bench [open "|src/redis-benchmark -q -s [dict get $srv unixsocket] -c 20 -n 20000 incr foo" "r+"]
+        set client [redis [srv host] [srv port] 0 $::tls]
+        set bench [open "|src/redis-benchmark -q -s [srv unixsocket] -c 20 -n 20000 incr foo" "r+"]
 
         wait_for_condition 100 1 {
             [$client get foo] > 0
@@ -30,7 +30,7 @@ tags {"aof external:skip"} {
 
     # Restart server to replay AOF
     start_server_aof [list dir $server_path] {
-        set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+        set client [redis [srv host] [srv port] 0 $::tls]
         wait_done_loading $client
         assert_equal 20000 [$client get foo]
     }
diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl
index 1f73fc3411a..137b2d86339 100644
--- a/tests/integration/aof.tcl
+++ b/tests/integration/aof.tcl
@@ -23,7 +23,7 @@ tags {"aof external:skip"} {
 
     start_server_aof [list dir $server_path aof-load-truncated yes] {
         test "Unfinished MULTI: Server should start if load-truncated is yes" {
-            assert_equal 1 [is_alive $srv]
+            assert_equal 1 [is_alive [srv pid]]
         }
     }
 
@@ -39,11 +39,11 @@ tags {"aof external:skip"} {
 
     start_server_aof [list dir $server_path aof-load-truncated yes] {
         test "Short read: Server should start if load-truncated is yes" {
-            assert_equal 1 [is_alive $srv]
+            assert_equal 1 [is_alive [srv pid]]
         }
 
         test "Truncated AOF loaded: we expect foo to be equal to 5" {
-            set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            set client [redis [srv host] [srv port] 0 $::tls]
             wait_done_loading $client
             assert {[$client get foo] eq "5"}
         }
@@ -56,11 +56,11 @@ tags {"aof external:skip"} {
     # Now the AOF file is expected to be correct
     start_server_aof [list dir $server_path aof-load-truncated yes] {
         test "Short read + command: Server should start" {
-            assert_equal 1 [is_alive $srv]
+            assert_equal 1 [is_alive [srv pid]]
         }
 
         test "Truncated AOF loaded: we expect foo to be equal to 6 now" {
-            set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            set client [redis [srv host] [srv port] 0 $::tls]
             wait_done_loading $client
             assert {[$client get foo] eq "6"}
         }
@@ -73,21 +73,9 @@ tags {"aof external:skip"} {
         append_to_aof [formatCommand set foo hello]
     }
 
-    start_server_aof [list dir $server_path aof-load-truncated yes] {
+    start_server_aof_ex [list dir $server_path aof-load-truncated yes] [list wait_ready false] {
         test "Bad format: Server should have logged an error" {
-            set pattern "*Bad file format reading the append only file*"
-            set retry 10
-            while {$retry} {
-                set result [exec tail -1 < [dict get $srv stdout]]
-                if {[string match $pattern $result]} {
-                    break
-                }
-                incr retry -1
-                after 1000
-            }
-            if {$retry == 0} {
-                error "assertion:expected error not found on config file"
-            }
+            wait_for_log_messages 0 {"*Bad file format reading the append only file*"} 0 10 1000
         }
     }
 
@@ -98,21 +86,9 @@ tags {"aof external:skip"} {
         append_to_aof [formatCommand set bar world]
     }
 
-    start_server_aof [list dir $server_path aof-load-truncated no] {
+    start_server_aof_ex [list dir $server_path aof-load-truncated no] [list wait_ready false] {
         test "Unfinished MULTI: Server should have logged an error" {
-            set pattern "*Unexpected end of file reading the append only file*"
-            set retry 10
-            while {$retry} {
-                set result [exec tail -1 < [dict get $srv stdout]]
-                if {[string match $pattern $result]} {
-                    break
-                }
-                incr retry -1
-                after 1000
-            }
-            if {$retry == 0} {
-                error "assertion:expected error not found on config file"
-            }
+            wait_for_log_messages 0 {"*Unexpected end of file reading the append only file*"} 0 10 1000
         }
     }
 
@@ -122,21 +98,9 @@ tags {"aof external:skip"} {
         append_to_aof [string range [formatCommand set bar world] 0 end-1]
     }
 
-    start_server_aof [list dir $server_path aof-load-truncated no] {
+    start_server_aof_ex [list dir $server_path aof-load-truncated no] [list wait_ready false] {
         test "Short read: Server should have logged an error" {
-            set pattern "*Unexpected end of file reading the append only file*"
-            set retry 10
-            while {$retry} {
-                set result [exec tail -1 < [dict get $srv stdout]]
-                if {[string match $pattern $result]} {
-                    break
-                }
-                incr retry -1
-                after 1000
-            }
-            if {$retry == 0} {
-                error "assertion:expected error not found on config file"
-            }
+            wait_for_log_messages 0 {"*Unexpected end of file reading the append only file*"} 0 10 1000
         }
     }
 
@@ -168,11 +132,11 @@ tags {"aof external:skip"} {
     ## Test that the server can be started using the truncated AOF
     start_server_aof [list dir $server_path aof-load-truncated no] {
         test "Fixed AOF: Server should have been started" {
-            assert_equal 1 [is_alive $srv]
+            assert_equal 1 [is_alive [srv pid]]
         }
 
         test "Fixed AOF: Keyspace should contain values that were parseable" {
-            set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            set client [redis [srv host] [srv port] 0 $::tls]
             wait_done_loading $client
             assert_equal "hello" [$client get foo]
             assert_equal "" [$client get bar]
@@ -188,11 +152,11 @@ tags {"aof external:skip"} {
 
     start_server_aof [list dir $server_path aof-load-truncated no] {
         test "AOF+SPOP: Server should have been started" {
-            assert_equal 1 [is_alive $srv]
+            assert_equal 1 [is_alive [srv pid]]
         }
 
         test "AOF+SPOP: Set should have 1 member" {
-            set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            set client [redis [srv host] [srv port] 0 $::tls]
             wait_done_loading $client
             assert_equal 1 [$client scard set]
         }
@@ -208,11 +172,11 @@ tags {"aof external:skip"} {
 
     start_server_aof [list dir $server_path] {
         test "AOF+SPOP: Server should have been started" {
-            assert_equal 1 [is_alive $srv]
+            assert_equal 1 [is_alive [srv pid]]
         }
 
         test "AOF+SPOP: Set should have 1 member" {
-            set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            set client [redis [srv host] [srv port] 0 $::tls]
             wait_done_loading $client
             assert_equal 1 [$client scard set]
         }
@@ -227,11 +191,11 @@ tags {"aof external:skip"} {
 
     start_server_aof [list dir $server_path aof-load-truncated no] {
         test "AOF+EXPIRE: Server should have been started" {
-            assert_equal 1 [is_alive $srv]
+            assert_equal 1 [is_alive [srv pid]]
         }
 
         test "AOF+EXPIRE: List should be empty" {
-            set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            set client [redis [srv host] [srv port] 0 $::tls]
             wait_done_loading $client
             assert_equal 0 [$client llen list]
         }
@@ -293,21 +257,9 @@ tags {"aof external:skip"} {
         append_to_aof [formatCommand set foo hello]
     }
 
-    start_server_aof [list dir $server_path aof-load-truncated yes] {
+    start_server_aof_ex [list dir $server_path aof-load-truncated yes] [list wait_ready false] {
         test "Unknown command: Server should have logged an error" {
-            set pattern "*Unknown command 'bla' reading the append only file*"
-            set retry 10
-            while {$retry} {
-                set result [exec tail -1 < [dict get $srv stdout]]
-                if {[string match $pattern $result]} {
-                    break
-                }
-                incr retry -1
-                after 1000
-            }
-            if {$retry == 0} {
-                error "assertion:expected error not found on config file"
-            }
+            wait_for_log_messages 0 {"*Unknown command 'bla' reading the append only file*"} 0 10 1000
         }
     }
 
@@ -320,8 +272,8 @@ tags {"aof external:skip"} {
 
     start_server_aof [list dir $server_path aof-load-truncated no] {
         test "AOF+LMPOP/BLMPOP: pop elements from the list" {
-            set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
-            set client2 [redis [dict get $srv host] [dict get $srv port] 1 $::tls]
+            set client [redis [srv host] [srv port] 0 $::tls]
+            set client2 [redis [srv host] [srv port] 1 $::tls]
             wait_done_loading $client
 
             # Pop all elements from mylist, should be blmpop delete mylist.
@@ -347,7 +299,7 @@ tags {"aof external:skip"} {
 
     start_server_aof [list dir $server_path aof-load-truncated no] {
         test "AOF+LMPOP/BLMPOP: after pop elements from the list" {
-            set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            set client [redis [srv host] [srv port] 0 $::tls]
             wait_done_loading $client
 
             # mylist and mylist2 no longer exist.
@@ -367,8 +319,8 @@ tags {"aof external:skip"} {
 
     start_server_aof [list dir $server_path aof-load-truncated no] {
         test "AOF+ZMPOP/BZMPOP: pop elements from the zset" {
-            set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
-            set client2 [redis [dict get $srv host] [dict get $srv port] 1 $::tls]
+            set client [redis [srv host] [srv port] 0 $::tls]
+            set client2 [redis [srv host] [srv port] 1 $::tls]
             wait_done_loading $client
 
             # Pop all elements from myzset, should be bzmpop delete myzset.
@@ -394,7 +346,7 @@ tags {"aof external:skip"} {
 
     start_server_aof [list dir $server_path aof-load-truncated no] {
         test "AOF+ZMPOP/BZMPOP: after pop elements from the zset" {
-            set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            set client [redis [srv host] [srv port] 0 $::tls]
             wait_done_loading $client
 
             # myzset and myzset2 no longer exist.
@@ -435,7 +387,7 @@ tags {"aof external:skip"} {
     }
     start_server_aof [list dir $server_path] {
         test {Successfully load AOF which has timestamp annotations inside} {
-            set c [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            set c [redis [srv host] [srv port] 0 $::tls]
             wait_done_loading $c
             assert_equal "bar1" [$c get foo1]
             assert_equal "bar2" [$c get foo2]
@@ -447,7 +399,7 @@ tags {"aof external:skip"} {
         # truncate to timestamp 1628217473
         exec src/redis-check-aof --truncate-to-timestamp 1628217473 $aof_manifest_file
         start_server_aof [list dir $server_path] {
-            set c [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            set c [redis [srv host] [srv port] 0 $::tls]
             wait_done_loading $c
             assert_equal "bar1" [$c get foo1]
             assert_equal "bar2" [$c get foo2]
@@ -457,7 +409,7 @@ tags {"aof external:skip"} {
         # truncate to timestamp 1628217471
         exec src/redis-check-aof --truncate-to-timestamp 1628217471 $aof_manifest_file
         start_server_aof [list dir $server_path] {
-            set c [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            set c [redis [srv host] [srv port] 0 $::tls]
             wait_done_loading $c
             assert_equal "bar1" [$c get foo1]
             assert_equal "bar2" [$c get foo2]
@@ -467,7 +419,7 @@ tags {"aof external:skip"} {
         # truncate to timestamp 1628217470
         exec src/redis-check-aof --truncate-to-timestamp 1628217470 $aof_manifest_file
         start_server_aof [list dir $server_path] {
-            set c [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
+            set c [redis [srv host] [srv port] 0 $::tls]
             wait_done_loading $c
             assert_equal "bar1" [$c get foo1]
             assert_equal "" [$c get foo2]
@@ -479,7 +431,7 @@ tags {"aof external:skip"} {
     }
 
     test {EVAL timeout with slow verbatim Lua script from AOF} {
-        start_server [list overrides [list dir $server_path appendonly yes lua-time-limit 1 aof-use-rdb-preamble no]] {  
+        start_server [list overrides [list dir $server_path appendonly yes lua-time-limit 1 aof-use-rdb-preamble no]] {
             # generate a long running script that is propagated to the AOF as script
             # make sure that the script times out during loading
             create_aof $aof_dirpath $aof_file {
@@ -529,6 +481,18 @@ tags {"aof external:skip"} {
         assert_match "*Start checking Old-Style AOF*is valid*" $result
     }
 
+    test {Test redis-check-aof for old style resp AOF - has data in the same format as manifest} {
+        create_aof $aof_dirpath $aof_file {
+            append_to_aof [formatCommand set file file]
+            append_to_aof [formatCommand set "file appendonly.aof.2.base.rdb seq 2 type b" "file appendonly.aof.2.base.rdb seq 2 type b"]
+        }
+
+        catch {
+            exec src/redis-check-aof $aof_file
+        } result
+        assert_match "*Start checking Old-Style AOF*is valid*" $result
+    }
+
     test {Test redis-check-aof for old style rdb-preamble AOF} {
         catch {
             exec src/redis-check-aof tests/assets/rdb-preamble.aof
@@ -554,7 +518,7 @@ tags {"aof external:skip"} {
 
         catch {
             exec src/redis-check-aof $aof_manifest_file
-        } result   
+        } result
         assert_match "*Start checking Multi Part AOF*Start to check BASE AOF (RESP format)*BASE AOF*is valid*Start to check INCR files*INCR AOF*is valid*All AOF files and manifest are valid*" $result
     }
 
@@ -577,6 +541,19 @@ tags {"aof external:skip"} {
         assert_match "*Start checking Multi Part AOF*Start to check BASE AOF (RDB format)*DB preamble is OK, proceeding with AOF tail*BASE AOF*is valid*Start to check INCR files*INCR AOF*is valid*All AOF files and manifest are valid*" $result
     }
 
+    test {Test redis-check-aof for Multi Part AOF contains a format error} {
+        create_aof_manifest $aof_dirpath $aof_manifest_file {
+            append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n"
+            append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n"
+            append_to_manifest "!!!\n"
+        }
+
+        catch {
+            exec src/redis-check-aof $aof_manifest_file
+        } result
+        assert_match "*Invalid AOF manifest file format*" $result
+    }
+
     test {Test redis-check-aof only truncates the last file for Multi Part AOF in fix mode} {
         create_aof $aof_dirpath $aof_base_file {
             append_to_aof [formatCommand set foo hello]
@@ -595,12 +572,12 @@ tags {"aof external:skip"} {
         }
 
         catch {
-            exec src/redis-check-aof $aof_manifest_file 
+            exec src/redis-check-aof $aof_manifest_file
         } result
         assert_match "*not valid*" $result
 
         catch {
-            exec src/redis-check-aof --fix $aof_manifest_file 
+            exec src/redis-check-aof --fix $aof_manifest_file
         } result
         assert_match "*Failed to truncate AOF*because it is not the last file*" $result
     }
@@ -628,7 +605,7 @@ tags {"aof external:skip"} {
         }
 
         catch {
-            exec src/redis-check-aof --truncate-to-timestamp 1628217473 $aof_manifest_file 
+            exec src/redis-check-aof --truncate-to-timestamp 1628217473 $aof_manifest_file
         } result
         assert_match "*Failed to truncate AOF*to timestamp*because it is not the last file*" $result
     }
diff --git a/tests/integration/corrupt-dump-fuzzer.tcl b/tests/integration/corrupt-dump-fuzzer.tcl
index 9cd4ff913ad..84495886caa 100644
--- a/tests/integration/corrupt-dump-fuzzer.tcl
+++ b/tests/integration/corrupt-dump-fuzzer.tcl
@@ -1,6 +1,8 @@
 # tests of corrupt listpack payload with valid CRC
 
-tags {"dump" "corruption" "external:skip"} {
+# The fuzzer can cause corrupt the state in many places, which could
+# mess up the reply, so we decided to skip logreqres.
+tags {"dump" "corruption" "external:skip" "logreqres:skip"} {
 
 # catch sigterm so that in case one of the random command hangs the test,
 # usually due to redis not putting a response in the output buffers,
@@ -17,12 +19,14 @@ proc generate_collections {suffix elements} {
         # add both string values and integers
         if {$j % 2 == 0} {set val $j} else {set val "_$j"}
         $rd hset hash$suffix $j $val
+        $rd hset hashmd$suffix $j $val
+        $rd hexpire hashmd$suffix [expr {int(rand() * 10000)}] FIELDS 1 $j
         $rd lpush list$suffix $val
         $rd zadd zset$suffix $j $val
         $rd sadd set$suffix $val
         $rd xadd stream$suffix * item 1 value $val
     }
-    for {set j 0} {$j < $elements * 5} {incr j} {
+    for {set j 0} {$j < $elements * 7} {incr j} {
         $rd read ; # Discard replies
     }
     $rd close
@@ -160,6 +164,10 @@ foreach sanitize_dump {no yes} {
                         set err [format "%s" $err] ;# convert to string for pattern matching
                         if {[string match "*SIGTERM*" $err]} {
                             puts "payload that caused test to hang: $printable_dump"
+                            if {$::dump_logs} {
+                                set srv [get_srv 0]
+                                dump_server_log $srv
+                            }
                             exit 1
                         }
                         # if the server terminated update stats and restart it
@@ -169,6 +177,11 @@ foreach sanitize_dump {no yes} {
                         incr stat_terminated_by_signal $by_signal
 
                         if {$by_signal != 0 || $sanitize_dump == yes} {
+                            if {$::dump_logs} {
+                                set srv [get_srv 0]
+                                dump_server_log $srv
+                            }
+
                             puts "Server crashed (by signal: $by_signal), with payload: $printable_dump"
                             set print_commands true
                         }
diff --git a/tests/integration/corrupt-dump.tcl b/tests/integration/corrupt-dump.tcl
index 3c9e5ce811f..3e644cbb968 100644
--- a/tests/integration/corrupt-dump.tcl
+++ b/tests/integration/corrupt-dump.tcl
@@ -59,6 +59,62 @@ test {corrupt payload: valid zipped hash header, dup records} {
     }
 }
 
+test {corrupt payload: hash listpackex with invalid string TTL} {
+    start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+        r config set sanitize-dump-payload yes
+        catch {
+            r restore key 0 "\x17\x2d\x2d\x00\x00\x00\x09\x00\x81\x61\x02\x01\x01\xf4\xa6\x96\x18\xb8\x8f\x01\x00\x00\x09\x82\x66\x31\x03\x82\x76\x31\x03\x83\x66\x6f\x6f\x04\x82\x66\x32\x03\x82\x76\x32\x03\x00\x01\xff\x0c\x00\xde\x40\xe5\x37\x51\x1c\x12\x56" replace
+        } err
+        assert_match "*Bad data format*" $err
+        r ping
+    }
+}
+
+test {corrupt payload: hash listpackex with TTL large than EB_EXPIRE_TIME_MAX} {
+    start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+        r config set sanitize-dump-payload yes
+        catch {
+            r restore key 0 "\x17\x33\x33\x00\x00\x00\x09\x00\x00\x01\x00\x01\xf4\x01\xc5\x89\x95\x8f\x01\x00\x00\x09\x01\x01\x82\x5f\x31\x03\xf4\x29\x94\x97\x95\x8f\x01\x00\x00\x09\x02\x01\x02\x01\xf4\x01\x5e\xaf\x95\x8f\x01\x33\x00\x09\xff\x0c\x00\x7e\x4f\xf4\x33\xe9\xc5\x3e\x56" replace
+        } err
+        assert_match "*Bad data format*" $err
+        r ping
+    }
+}
+
+test {corrupt payload: hash listpackex with unordered TTL fields} {
+    start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+        r config set sanitize-dump-payload yes
+        catch {
+            r restore key 0 "\x17\xc3\x30\x35\x14\x35\x00\x00\x00\t\x00\x82\x66\x32\x03\x82\x76\x32\x03\xf4\x80\x73\x16\xd1\x8f\x01\x20\x12\x02\x82\x66\x31\x20\x11\x03\x31\x03\xf4\x7f\xe0\x01\x11\x00\x33\x20\x11\x04\x33\x03\x00\x01\xff\x0c\x00\xf6\x70\x29\x57\x11\x68\x9d\xe5" replace
+        } err
+        assert_match "*Bad data format*" $err
+        r ping
+    }
+}
+
+test {corrupt payload: hash listpackex field without TTL should not be followed by field with TTL} {
+    start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+        r config set sanitize-dump-payload yes
+        catch {
+            r restore key 0 "\x17\x2d\x2d\x00\x00\x00\x09\x00\x82\x66\x31\x03\x82\x76\x31\x03\x00\x01\x82\x66\x32\x03\x82\x76\x32\x03\xf4\xe0\x59\x7a\x96\x00\x00\x00\x00\x09\x82\x66\x33\x03\x82\x76\x33\x03\x00\x01\xff\x0c\x00\x42\x66\xd4\xbe\x17\xc3\x96\x72" replace
+        } err
+        assert_match "*Bad data format*" $err
+        r ping
+    }
+}
+
+test {corrupt payload: hash hashtable with TTL large than EB_EXPIRE_TIME_MAX} {
+    start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
+        r config set hash-max-listpack-entries 0
+        r config set sanitize-dump-payload yes
+        catch {
+            r restore key 0 "\x16\x02\x81\x00\x01\x00\x00\x00\x00\x00\x00\x02\x66\x31\x02\x76\x31\x81\x00\x01\x00\x00\x00\x00\x00\x00\x02\x66\x32\x02\x76\x32\x0c\x00\xb9\x3c\x65\x28\x40\x94\x58\x36" replace
+        } err
+        assert_match "*Bad data format*" $err
+        r ping
+    }
+}
+
 test {corrupt payload: quicklist big ziplist prev len} {
     start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
         r config set sanitize-dump-payload no
@@ -146,7 +202,7 @@ test {corrupt payload: load corrupted rdb with no CRC - #3505} {
 
     # wait for termination
     wait_for_condition 100 50 {
-        ! [is_alive $srv]
+        ! [is_alive [dict get $srv pid]]
     } else {
         fail "rdb loading didn't fail"
     }
diff --git a/tests/integration/logging.tcl b/tests/integration/logging.tcl
index 4f8639be0de..b547cd8fab1 100644
--- a/tests/integration/logging.tcl
+++ b/tests/integration/logging.tcl
@@ -1,31 +1,62 @@
 tags {"external:skip"} {
 
 set system_name [string tolower [exec uname -s]]
-set backtrace_supported 0
-
-# We only support darwin or Linux with glibc
-if {$system_name eq {darwin}} {
-    set backtrace_supported 1
-} elseif {$system_name eq {linux}} {
-    # Avoid the test on libmusl, which does not support backtrace
-    # and on static binaries (ldd exit code 1) where we can't detect libmusl
-    catch {
-        set ldd [exec ldd src/redis-server]
-        if {![string match {*libc.*musl*} $ldd]} {
-            set backtrace_supported 1
+set backtrace_supported [system_backtrace_supported]
+set threads_mngr_supported 0 ;# Do we support printing stack trace from all threads, not just the one that got the signal?
+if {$system_name eq {linux}} {
+    set threads_mngr_supported 1
+}
+
+# look for the DEBUG command in the backtrace, used when we triggered
+# a stack trace print while we know redis is running that command.
+proc check_log_backtrace_for_debug {log_pattern} {
+    # search for the final line in the stacktraces generation to make sure it was completed.
+    set pattern "* STACK TRACE DONE *"
+    set res [wait_for_log_messages 0 \"$pattern\" 0 100 100]
+
+    set res [wait_for_log_messages 0 \"$log_pattern\" 0 100 100]
+    if {$::verbose} { puts $res}
+
+    # If the stacktrace is printed more than once, it means redis crashed during crash report generation
+    assert_equal [count_log_message 0 "STACK TRACE -"] 1
+
+    upvar threads_mngr_supported threads_mngr_supported
+
+    # the following checks are only done if we support printing stack trace from all threads
+    if {$threads_mngr_supported} {
+        assert_equal [count_log_message 0 "setupStacktracePipe failed"] 0
+        assert_equal [count_log_message 0 "failed to open /proc/"] 0
+        assert_equal [count_log_message 0 "failed to find SigBlk or/and SigIgn"] 0
+        # the following are skipped since valgrind is slow and a timeout can happen
+        if {!$::valgrind} {
+            assert_equal [count_log_message 0 "wait_threads(): waiting threads timed out"] 0
+            # make sure redis prints stack trace for all threads. we know 3 threads are idle in bio.c
+            assert_equal [count_log_message 0 "bioProcessBackgroundJobs"] 3
         }
     }
+
+    set pattern "*debugCommand*"
+    set res [wait_for_log_messages 0 \"$pattern\" 0 100 100]
+    if {$::verbose} { puts $res}
 }
 
+# used when backtrace_supported == 0
+proc check_crash_log {log_pattern} {
+    set res [wait_for_log_messages 0 \"$log_pattern\" 0 50 100]
+    if {$::verbose} { puts $res }
+}
+
+# test the watchdog and the stack trace report from multiple threads
 if {$backtrace_supported} {
     set server_path [tmpdir server.log]
     start_server [list overrides [list dir $server_path]] {
         test "Server is able to generate a stack trace on selected systems" {
             r config set watchdog-period 200
             r debug sleep 1
-            set pattern "*debugCommand*"
-            set res [wait_for_log_messages 0 \"$pattern\" 0 100 100]
-            if {$::verbose} { puts $res }
+            
+            check_log_backtrace_for_debug "*WATCHDOG TIMER EXPIRED*"
+            # make sure redis is still alive
+            assert_equal "PONG" [r ping]
         }
     }
 }
@@ -33,27 +64,61 @@ if {$backtrace_supported} {
 # Valgrind will complain that the process terminated by a signal, skip it.
 if {!$::valgrind} {
     if {$backtrace_supported} {
-        set crash_pattern "*STACK TRACE*"
-    } else {
-        set crash_pattern "*crashed by signal*"
+        set check_cb check_log_backtrace_for_debug
+    } else {  
+        set check_cb check_crash_log
     }
 
+    # test being killed by a SIGABRT from outside
     set server_path [tmpdir server1.log]
     start_server [list overrides [list dir $server_path crash-memcheck-enabled no]] {
         test "Crash report generated on SIGABRT" {
             set pid [s process_id]
+            r deferred 1
+            r debug sleep 10 ;# so that we see the function in the stack trace
+            r flush
+            after 100 ;# wait for redis to get into the sleep
             exec kill -SIGABRT $pid
-            set res [wait_for_log_messages 0 \"$crash_pattern\" 0 50 100]
-            if {$::verbose} { puts $res }
+            $check_cb "*crashed by signal*"
         }
     }
 
+    # test DEBUG SEGFAULT
     set server_path [tmpdir server2.log]
     start_server [list overrides [list dir $server_path crash-memcheck-enabled no]] {
         test "Crash report generated on DEBUG SEGFAULT" {
             catch {r debug segfault}
-            set res [wait_for_log_messages 0 \"$crash_pattern\" 0 50 100]
-            if {$::verbose} { puts $res }
+            $check_cb "*crashed by signal*"
+        }
+    }
+
+    # test DEBUG SIGALRM being non-fatal
+    set server_path [tmpdir server3.log]
+    start_server [list overrides [list dir $server_path]] {
+        test "Stacktraces generated on SIGALRM" {
+            set pid [s process_id]
+            r deferred 1
+            r debug sleep 10 ;# so that we see the function in the stack trace
+            r flush
+            after 100 ;# wait for redis to get into the sleep
+            exec kill -SIGALRM $pid
+            $check_cb "*Received SIGALRM*"
+            r read
+            r deferred 0
+            # make sure redis is still alive
+            assert_equal "PONG" [r ping]
+        }
+    }
+}
+
+# test DEBUG ASSERT
+if {$backtrace_supported} {
+    set server_path [tmpdir server4.log]
+    # Use exit() instead of abort() upon assertion so Valgrind tests won't fail.
+    start_server [list overrides [list dir $server_path use-exit-on-panic yes crash-memcheck-enabled no]] {
+        test "Generate stacktrace on assertion" {
+            catch {r debug assert}
+            check_log_backtrace_for_debug "*ASSERTION FAILED*"
         }
     }
 }
diff --git a/tests/integration/psync2-master-restart.tcl b/tests/integration/psync2-master-restart.tcl
index a9e21d12db7..b0d39438950 100644
--- a/tests/integration/psync2-master-restart.tcl
+++ b/tests/integration/psync2-master-restart.tcl
@@ -14,7 +14,7 @@ start_server {} {
     # Make sure the server saves an RDB on shutdown
     $master config set save "3600 1"
 
-    # Because we will test partial resync later, we don’t want a timeout to cause
+    # Because we will test partial resync later, we don't want a timeout to cause
     # the master-replica disconnect, then the extra reconnections will break the
     # sync_partial_ok stat test
     $master config set repl-timeout 3600
@@ -179,6 +179,17 @@ start_server {} {
             $master set $j somevalue px 10
         }
 
+        ##### hash-field-expiration
+        # Hashes of type OBJ_ENCODING_LISTPACK_EX won't be discarded during
+        # RDB load, even if they are expired.
+        $master hset myhash1 f1 v1 f2 v2 f3 v3
+        $master hpexpire myhash1 10 FIELDS 3 f1 f2 f3
+        # Hashes of type RDB_TYPE_HASH_METADATA will be discarded during RDB load.
+        $master config set hash-max-listpack-entries 0
+        $master hset myhash2 f1 v1 f2 v2
+        $master hpexpire myhash2 10 FIELDS 2 f1 f2
+        $master config set hash-max-listpack-entries 1
+
         after 20
 
         wait_for_condition 500 100 {
diff --git a/tests/integration/rdb.tcl b/tests/integration/rdb.tcl
index cce21671f89..f528097f4c6 100644
--- a/tests/integration/rdb.tcl
+++ b/tests/integration/rdb.tcl
@@ -416,4 +416,228 @@ start_server {} {
     } {OK}
 }
 
+set server_path [tmpdir "server.partial-hfield-exp-test"]
+
+# verifies writing and reading hash key with expiring and persistent fields
+start_server [list overrides [list "dir" $server_path]] {
+    foreach {type lp_entries} {listpack 512 dict 0} {
+        test "HFE - save and load expired fields, expired soon after, or long after ($type)" {
+            r config set hash-max-listpack-entries $lp_entries
+
+            r FLUSHALL
+
+            r HMSET key a 1 b 2 c 3 d 4 e 5
+            # expected to be expired long after restart
+            r HEXPIREAT key 2524600800 FIELDS 1 a
+            # expected long TTL value (46 bits) is saved and loaded correctly
+            r HPEXPIREAT key 65755674080852 FIELDS 1 b
+            # expected to be already expired after restart
+            r HPEXPIRE key 80 FIELDS 1 d
+            # expected to be expired soon after restart
+            r HPEXPIRE key 200 FIELDS 1 e
+
+            r save
+            # sleep 101 ms to make sure d will expire after restart
+            after 101
+            restart_server 0 true false
+            wait_done_loading r
+
+            assert_equal [lsort [r hgetall key]] "1 2 3 a b c"
+            assert_equal [r hpexpiretime key FIELDS 3 a b c] {2524600800000 65755674080852 -1}
+            assert_equal [s rdb_last_load_keys_loaded] 1
+
+            # wait until expired_hash_fields equals 2
+            wait_for_condition 10 100 {
+                [s expired_hash_fields] == 2
+            } else {
+                fail "Value of expired_hash_fields is not as expected"
+            }
+        }
+    }
+}
+
+set server_path [tmpdir "server.all-hfield-exp-test"]
+
+# verifies writing hash with several expired keys, and active-expiring it on load
+start_server [list overrides [list "dir" $server_path]] {
+    foreach {type lp_entries} {listpack 512 dict 0} {
+        test "HFE - save and load rdb all fields expired, ($type)" {
+            r config set hash-max-listpack-entries $lp_entries
+
+            r FLUSHALL
+
+            r HMSET key a 1 b 2 c 3 d 4
+            r HPEXPIRE key 100 FIELDS 4 a b c d
+
+            r save
+            # sleep 101 ms to make sure all fields will expire after restart
+            after 101
+
+            restart_server 0 true false
+            wait_done_loading r
+
+            #  it is expected that no field was expired on load and the key was
+            # loaded, even though all its fields are actually expired.
+            assert_equal [s rdb_last_load_keys_loaded] 1
+
+            assert_equal [r hgetall key] {}
+        }
+    }
+}
+
+set server_path [tmpdir "server.listpack-to-dict-test"]
+
+test "save listpack, load dict" {
+    start_server [list overrides [list "dir" $server_path  enable-debug-command yes]] {
+        r config set hash-max-listpack-entries 512
+
+        r FLUSHALL
+
+        r HMSET key a 1 b 2 c 3 d 4
+        assert_match "*encoding:listpack*" [r debug object key]
+        r HPEXPIRE key 100 FIELDS 1 d
+        r save
+
+        # sleep 200 ms to make sure 'd' will expire after when reloading
+        after 200
+
+        # change configuration and reload - result should be dict-encoded key
+        r config set hash-max-listpack-entries 0
+        r debug reload nosave
+
+        # first verify d was not expired during load (no expiry when loading
+        # a hash that was saved listpack-encoded)
+        assert_equal [s rdb_last_load_keys_loaded] 1
+
+        # d should be lazy expired in hgetall
+        assert_equal [lsort [r hgetall key]] "1 2 3 a b c"
+        assert_match "*encoding:hashtable*" [r debug object key]
+    }
+}
+
+set server_path [tmpdir "server.dict-to-listpack-test"]
+
+test "save dict, load listpack" {
+    start_server [list overrides [list "dir" $server_path  enable-debug-command yes]] {
+        r config set hash-max-listpack-entries 0
+
+        r FLUSHALL
+
+        r HMSET key a 1 b 2 c 3 d 4
+        assert_match "*encoding:hashtable*" [r debug object key]
+        r HPEXPIRE key 200 FIELDS 1 d
+        r save
+
+        # sleep 201 ms to make sure 'd' will expire during reload
+        after 201
+
+        # change configuration and reload - result should be LP-encoded key
+        r config set hash-max-listpack-entries 512
+        r debug reload nosave
+
+        # verify d was expired during load
+        assert_equal [s rdb_last_load_keys_loaded] 1
+
+        assert_equal [lsort [r hgetall key]] "1 2 3 a b c"
+        assert_match "*encoding:listpack*" [r debug object key]
+    }
+}
+
+set server_path [tmpdir "server.active-expiry-after-load"]
+
+# verifies a field is correctly expired by active expiry AFTER loading from RDB
+foreach {type lp_entries} {listpack 512 dict 0} {
+    start_server [list overrides [list "dir" $server_path enable-debug-command yes]] {
+        test "active field expiry after load, ($type)" {
+            r config set hash-max-listpack-entries $lp_entries
+
+            r FLUSHALL
+
+            r HMSET key a 1 b 2 c 3 d 4 e 5 f 6
+            r HEXPIREAT key 2524600800 FIELDS 2 a b
+            r HPEXPIRE key 200 FIELDS 2 c d
+
+            r save
+            r debug reload nosave
+
+            # wait at most 2 secs to make sure 'c' and 'd' will active-expire
+            wait_for_condition 20 100 {
+                [s expired_hash_fields] == 2
+            } else {
+                fail "expired hash fields is [s expired_hash_fields] != 2"
+            }
+
+            assert_equal [s rdb_last_load_keys_loaded] 1
+
+            # hgetall might lazy expire fields, so it's only called after the stat asserts
+            assert_equal [lsort [r hgetall key]] "1 2 5 6 a b e f"
+            assert_equal [r hexpiretime key FIELDS 6 a b c d e f] {2524600800 2524600800 -2 -2 -1 -1}
+        }
+    }
+}
+
+set server_path [tmpdir "server.lazy-expiry-after-load"]
+
+foreach {type lp_entries} {listpack 512 dict 0} {
+    start_server [list overrides [list "dir" $server_path enable-debug-command yes]] {
+        test "lazy field expiry after load, ($type)" {
+            r config set hash-max-listpack-entries $lp_entries
+            r debug set-active-expire 0
+
+            r FLUSHALL
+
+            r HMSET key a 1 b 2 c 3 d 4 e 5 f 6
+            r HEXPIREAT key 2524600800 FIELDS 2 a b
+            r HPEXPIRE key 200 FIELDS 2 c d
+
+            r save
+            r debug reload nosave
+
+            # sleep 500 msec to make sure 'c' and 'd' will lazy-expire when calling hgetall
+            after 500
+
+            assert_equal [s rdb_last_load_keys_loaded] 1
+            assert_equal [s expired_hash_fields] 0
+
+            # hgetall will lazy expire fields, so it's only called after the stat asserts
+            assert_equal [lsort [r hgetall key]] "1 2 5 6 a b e f"
+            assert_equal [r hexpiretime key FIELDS 6 a b c d e f] {2524600800 2524600800 -2 -2 -1 -1}
+        }
+    }
+}
+
+set server_path [tmpdir "server.unexpired-items-rax-list-boundary"]
+
+foreach {type lp_entries} {listpack 512 dict 0} {
+    start_server [list overrides [list "dir" $server_path enable-debug-command yes]] {
+        test "load un-expired items below and above rax-list boundary, ($type)" {
+            r config set hash-max-listpack-entries $lp_entries
+
+            r flushall
+
+            set hash_sizes {15 16 17 31 32 33}
+            foreach h $hash_sizes {
+                for {set i 1} {$i <= $h} {incr i} {
+                    r hset key$h f$i v$i
+                    r hexpireat key$h 2524600800 FIELDS 1 f$i
+                }
+            }
+
+            r save
+
+            restart_server 0 true false
+            wait_done_loading r
+
+            set hash_sizes {15 16 17 31 32 33}
+            foreach h $hash_sizes {
+                for {set i 1} {$i <= $h} {incr i} {
+                    # random expiration time
+                    assert_equal [r hget key$h f$i] v$i
+                    assert_equal [r hexpiretime key$h FIELDS 1 f$i] 2524600800
+                }
+            }
+        }
+    }
+}
+
 } ;# tags
diff --git a/tests/integration/redis-benchmark.tcl b/tests/integration/redis-benchmark.tcl
index 8035632c7c8..c3254408c8c 100644
--- a/tests/integration/redis-benchmark.tcl
+++ b/tests/integration/redis-benchmark.tcl
@@ -1,5 +1,5 @@
 source tests/support/benchmark.tcl
-
+source tests/support/cli.tcl
 
 proc cmdstat {cmd} {
     return [cmdrstat $cmd r]
@@ -25,10 +25,11 @@ proc default_set_get_checks {} {
     assert_match  {} [cmdstat lrange]
 }
 
-start_server {tags {"benchmark network external:skip logreqres:skip"}} {
+tags {"benchmark network external:skip logreqres:skip"} {
     start_server {} {
         set master_host [srv 0 host]
         set master_port [srv 0 port]
+        r select 0
 
         test {benchmark: set,get} {
             set cmd [redisbenchmark $master_host $master_port "-c 5 -n 10 -t set,get"]
@@ -126,6 +127,13 @@ start_server {tags {"benchmark network external:skip logreqres:skip"}} {
             r config set maxclients $original_maxclients
         }
 
+        test {benchmark: read last argument from stdin} {
+            set base_cmd [redisbenchmark $master_host $master_port "-x -n 10 set key"]
+            set cmd "printf arg | $base_cmd"
+            common_bench_setup $cmd
+            r get key
+        } {arg}
+
         # tls specific tests
         if {$::tls} {
             test {benchmark: specific tls-ciphers} {
diff --git a/tests/integration/redis-cli.tcl b/tests/integration/redis-cli.tcl
index da82dda6526..55b57504091 100644
--- a/tests/integration/redis-cli.tcl
+++ b/tests/integration/redis-cli.tcl
@@ -68,6 +68,14 @@ start_server {tags {"cli"}} {
         set _ [format_output [read_cli $fd]]
     }
 
+    # Note: prompt may be affected by the local history, if failed, please
+    # try using `rm ~/.rediscli_history` to delete it and then retry.
+    proc test_interactive_cli_with_prompt {name code} {
+        set ::env(FAKETTY_WITH_PROMPT) 1
+        test_interactive_cli $name $code
+        unset ::env(FAKETTY_WITH_PROMPT)
+    }
+
     proc test_interactive_cli {name code} {
         set ::env(FAKETTY) 1
         set fd [open_cli]
@@ -145,6 +153,200 @@ start_server {tags {"cli"}} {
         unset ::env(FAKETTY)
     }
 
+    test_interactive_cli_with_prompt "should find first search result" {
+        run_command $fd "keys one\x0D"
+        run_command $fd "keys two\x0D"
+
+        puts $fd "\x12" ;# CTRL+R
+        read_cli $fd
+
+        puts -nonewline $fd "ey"
+        set result [read_cli $fd]
+        assert_equal 1 [regexp {\(reverse-i-search\): \x1B\[0mk\x1B\[1mey\x1B\[0ms two} $result]
+    }
+
+    test_interactive_cli_with_prompt "should find and use the first search result" {
+        set now [clock seconds]
+        run_command $fd "SET blah \"myvalue\"\x0D"
+        run_command $fd "GET blah\x0D"
+
+        puts $fd "\x12" ;# CTRL+R
+        read_cli $fd
+
+        puts -nonewline $fd "ET b"
+        set result [read_cli $fd]
+        assert_equal 1 [regexp {\(reverse-i-search\): \x1B\[0mG\x1B\[1mET b\x1B\[0mlah} $result]
+
+        puts $fd "\x0D" ;# ENTER
+        set result2 [read_cli $fd]
+        assert_equal 1 [regexp {.*"myvalue"\n} $result2]
+    }
+
+    test_interactive_cli_with_prompt "should be ok if there is no result" {
+        puts $fd "\x12" ;# CTRL+R
+
+        set now [clock seconds]
+        puts $fd "\x12" ;# CTRL+R
+        set result [read_cli $fd]
+        assert_equal 1 [regexp {\(reverse-i-search\):} $result]
+
+        set result2 [run_command $fd "keys \"$now\"\x0D"]
+        assert_equal 1 [regexp {.*(empty array).*} $result2]
+    }
+
+    test_interactive_cli_with_prompt "upon submitting search, (reverse-i-search) prompt should go away" {
+        puts $fd "\x12" ;# CTRL+R
+
+        set now [clock seconds]
+        set result [read_cli $fd]
+        assert_equal 1 [regexp {\(reverse-i-search\):} $result]
+
+        set result2 [run_command $fd "keys \"$now\"\x0D"]
+
+        assert_equal 1 [regexp {127\.0\.0\.1:[0-9]*(\[[0-9]])?>} $result2]
+    }
+
+    test_interactive_cli_with_prompt "should find second search result if user presses ctrl+r again" {
+        run_command $fd "keys one\x0D"
+        run_command $fd "keys two\x0D"
+
+        puts $fd "\x12" ;# CTRL+R
+        read_cli $fd
+
+        puts -nonewline $fd "ey"
+        set result [read_cli $fd]
+        assert_equal 1 [regexp {\(reverse-i-search\): \x1B\[0mk\x1B\[1mey\x1B\[0ms two} $result]
+
+        puts $fd "\x12" ;# CTRL+R
+        set result [read_cli $fd]
+        assert_equal 1 [regexp {\(reverse-i-search\): \x1B\[0mk\x1B\[1mey\x1B\[0ms one} $result]
+    }
+
+    test_interactive_cli_with_prompt "should find second search result if user presses ctrl+s" {
+        run_command $fd "keys one\x0D"
+        run_command $fd "keys two\x0D"
+
+        puts $fd "\x13" ;# CTRL+S
+        read_cli $fd
+
+        puts -nonewline $fd "ey"
+        set result [read_cli $fd]
+        assert_equal 1 [regexp {\(i-search\): \x1B\[0mk\x1B\[1mey\x1B\[0ms one} $result]
+
+        puts $fd "\x13" ;# CTRL+S
+        set result [read_cli $fd]
+        assert_equal 1 [regexp {\(i-search\): \x1B\[0mk\x1B\[1mey\x1B\[0ms two} $result]
+    }
+
+    test_interactive_cli_with_prompt "should exit reverse search if user presses ctrl+g" {
+        run_command $fd ""
+
+        puts $fd "\x12" ;# CTRL+R
+        set result [read_cli $fd]
+        assert_equal 1 [regexp {\(reverse-i-search\):} $result]
+
+        puts $fd "\x07" ;# CTRL+G
+        set result2 [read_cli $fd]
+        assert_equal 1 [regexp {127\.0\.0\.1:[0-9]*(\[[0-9]])?>} $result2]
+    }
+
+    test_interactive_cli_with_prompt "should exit reverse search if user presses up arrow" {
+        run_command $fd ""
+
+        puts $fd "\x12" ;# CTRL+R
+        set result [read_cli $fd]
+        assert_equal 1 [regexp {\(reverse-i-search\):} $result]
+
+        puts $fd "\x1B\x5B\x41" ;# up arrow
+        set result2 [read_cli $fd]
+        assert_equal 1 [regexp {127\.0\.0\.1:[0-9]*(\[[0-9]])?>} $result2]
+    }
+
+    test_interactive_cli_with_prompt "should exit reverse search if user presses right arrow" {
+        run_command $fd ""
+
+        puts $fd "\x12" ;# CTRL+R
+        set result [read_cli $fd]
+        assert_equal 1 [regexp {\(reverse-i-search\):} $result]
+
+        puts $fd "\x1B\x5B\x42" ;# right arrow
+        set result2 [read_cli $fd]
+        assert_equal 1 [regexp {127\.0\.0\.1:[0-9]*(\[[0-9]])?>} $result2]
+    }
+
+    test_interactive_cli_with_prompt "should exit reverse search if user presses down arrow" {
+        run_command $fd ""
+
+        puts $fd "\x12" ;# CTRL+R
+        set result [read_cli $fd]
+        assert_equal 1 [regexp {\(reverse-i-search\):} $result]
+
+        puts $fd "\x1B\x5B\x43" ;# down arrow
+        set result2 [read_cli $fd]
+        assert_equal 1 [regexp {127\.0\.0\.1:[0-9]*(\[[0-9]])?>} $result2]
+    }
+
+    test_interactive_cli_with_prompt "should exit reverse search if user presses left arrow" {
+        run_command $fd ""
+
+        puts $fd "\x12" ;# CTRL+R
+        set result [read_cli $fd]
+        assert_equal 1 [regexp {\(reverse-i-search\):} $result]
+
+        puts $fd "\x1B\x5B\x44" ;# left arrow
+        set result2 [read_cli $fd]
+        assert_equal 1 [regexp {127\.0\.0\.1:[0-9]*(\[[0-9]])?>} $result2]
+    }
+
+    test_interactive_cli_with_prompt "should disable and persist line if user presses tab" {
+        run_command $fd ""
+
+        puts $fd "\x12" ;# CTRL+R
+        set result [read_cli $fd]
+        assert_equal 1 [regexp {\(reverse-i-search\):} $result]
+
+        puts -nonewline $fd "GET blah"
+        read_cli $fd
+
+        puts -nonewline $fd "\x09" ;# TAB
+        set result2 [read_cli $fd]
+        assert_equal 1 [regexp {127\.0\.0\.1:[0-9]*(\[[0-9]])?> GET blah} $result2]
+    }
+
+    test_interactive_cli_with_prompt "should disable and persist search result if user presses tab" {
+        run_command $fd "GET one\x0D"
+
+        puts $fd "\x12" ;# CTRL+R
+        set result [read_cli $fd]
+        assert_equal 1 [regexp {\(reverse-i-search\):} $result]
+
+        puts -nonewline $fd "one"
+        read_cli $fd
+
+        puts -nonewline $fd "\x09" ;# TAB
+        set result2 [read_cli $fd]
+        assert_equal 1 [regexp {127\.0\.0\.1:[0-9]*(\[[0-9]])?> GET one} $result2]
+    }
+
+    test_interactive_cli_with_prompt "should disable and persist line and move the cursor if user presses tab" {
+        run_command $fd ""
+
+        puts $fd "\x12" ;# CTRL+R
+        set result [read_cli $fd]
+        assert_equal 1 [regexp {\(reverse-i-search\):} $result]
+
+        puts -nonewline $fd "GET blah"
+        read_cli $fd
+
+        puts -nonewline $fd "\x09" ;# TAB
+        set result2 [read_cli $fd]
+        assert_equal 1 [regexp {127\.0\.0\.1:[0-9]*(\[[0-9]])?> GET blah} $result2]
+
+        puts -nonewline $fd "suffix"
+        set result3 [read_cli $fd]
+        assert_equal 1 [regexp {127\.0\.0\.1:[0-9]*(\[[0-9]])?> GET blahsuffix} $result3]
+    }
+
     test_interactive_cli "INFO response should be printed raw" {
         set lines [split [run_command $fd info] "\n"]
         foreach line $lines {
diff --git a/tests/modules/Makefile b/tests/modules/Makefile
index d63c8548d63..586e66e067f 100644
--- a/tests/modules/Makefile
+++ b/tests/modules/Makefile
@@ -4,10 +4,10 @@ uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
 
 warning_cflags = -W -Wall -Wno-missing-field-initializers
 ifeq ($(uname_S),Darwin)
-	SHOBJ_CFLAGS ?= $(warning_cflags) -dynamic -fno-common -g -ggdb -std=c99 -O2
+	SHOBJ_CFLAGS ?= $(warning_cflags) -dynamic -fno-common -g -ggdb -std=gnu11 -O2
 	SHOBJ_LDFLAGS ?= -bundle -undefined dynamic_lookup
 else	# Linux, others
-	SHOBJ_CFLAGS ?= $(warning_cflags) -fno-common -g -ggdb -std=c99 -O2
+	SHOBJ_CFLAGS ?= $(warning_cflags) -fno-common -g -ggdb -std=gnu11 -O2
 	SHOBJ_LDFLAGS ?= -shared
 endif
 
@@ -62,7 +62,8 @@ TEST_MODULES = \
     usercall.so \
     postnotifications.so \
     moduleauthtwo.so \
-    rdbloadsave.so
+    rdbloadsave.so \
+    crash.so
 
 .PHONY: all
 
diff --git a/tests/modules/aclcheck.c b/tests/modules/aclcheck.c
index 09b525cc55a..b7465180430 100644
--- a/tests/modules/aclcheck.c
+++ b/tests/modules/aclcheck.c
@@ -197,6 +197,9 @@ int commandBlockCheck(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
     int result = RedisModule_CreateCommand(ctx,"command.that.should.fail", module_test_acl_category, "", 0, 0, 0);
     response_ok |= (result == REDISMODULE_OK);
 
+    result = RedisModule_AddACLCategory(ctx,"blockedcategory");
+    response_ok |= (result == REDISMODULE_OK);
+    
     RedisModuleCommand *parent = RedisModule_GetCommand(ctx,"block.commands.outside.onload");
     result = RedisModule_SetCommandACLCategories(parent, "write");
     response_ok |= (result == REDISMODULE_OK);
@@ -204,7 +207,8 @@ int commandBlockCheck(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
     result = RedisModule_CreateSubcommand(parent,"subcommand.that.should.fail",module_test_acl_category,"",0,0,0);
     response_ok |= (result == REDISMODULE_OK);
     
-    /* This validates that it's not possible to create commands outside OnLoad,
+    /* This validates that it's not possible to create commands or add
+     * a new ACL Category outside OnLoad function.
      * thus returns an error if they succeed. */
     if (response_ok) {
         RedisModule_ReplyWithError(ctx, "UNEXPECTEDOK");
@@ -215,12 +219,31 @@ int commandBlockCheck(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
 }
 
 int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
-    REDISMODULE_NOT_USED(argv);
-    REDISMODULE_NOT_USED(argc);
 
     if (RedisModule_Init(ctx,"aclcheck",1,REDISMODULE_APIVER_1)== REDISMODULE_ERR)
         return REDISMODULE_ERR;
 
+    if (argc > 1) return RedisModule_WrongArity(ctx);
+    
+    /* When that flag is passed, we try to create too many categories,
+     * and the test expects this to fail. In this case redis returns REDISMODULE_ERR
+     * and set errno to ENOMEM*/
+    if (argc == 1) {
+        long long fail_flag = 0;
+        RedisModule_StringToLongLong(argv[0], &fail_flag);
+        if (fail_flag) {
+            for (size_t j = 0; j < 45; j++) {
+                RedisModuleString* name =  RedisModule_CreateStringPrintf(ctx, "customcategory%zu", j);
+                if (RedisModule_AddACLCategory(ctx, RedisModule_StringPtrLen(name, NULL)) == REDISMODULE_ERR) {
+                    RedisModule_Assert(errno == ENOMEM);
+                    RedisModule_FreeString(ctx, name);
+                    return REDISMODULE_ERR;
+                }
+                RedisModule_FreeString(ctx, name);
+            }
+        }
+    }
+
     if (RedisModule_CreateCommand(ctx,"aclcheck.set.check.key", set_aclcheck_key,"write",0,0,0) == REDISMODULE_ERR)
         return REDISMODULE_ERR;
 
@@ -265,5 +288,29 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
                                       "write",0,0,0) == REDISMODULE_ERR)
             return REDISMODULE_ERR;
 
+    /* This validates that, when module tries to add a category with invalid characters,
+     * redis returns REDISMODULE_ERR and set errno to `EINVAL` */
+    if (RedisModule_AddACLCategory(ctx,"!nval!dch@r@cter$") == REDISMODULE_ERR)
+        RedisModule_Assert(errno == EINVAL);
+    else 
+        return REDISMODULE_ERR;
+    
+    /* This validates that, when module tries to add a category that already exists,
+     * redis returns REDISMODULE_ERR and set errno to `EBUSY` */
+    if (RedisModule_AddACLCategory(ctx,"write") == REDISMODULE_ERR)
+        RedisModule_Assert(errno == EBUSY);
+    else 
+        return REDISMODULE_ERR;
+    
+    if (RedisModule_AddACLCategory(ctx,"foocategory") == REDISMODULE_ERR)
+        return REDISMODULE_ERR;
+    
+    if (RedisModule_CreateCommand(ctx,"aclcheck.module.command.test.add.new.aclcategories", module_test_acl_category,"",0,0,0) == REDISMODULE_ERR)
+        return REDISMODULE_ERR;
+    RedisModuleCommand *test_add_new_aclcategories = RedisModule_GetCommand(ctx,"aclcheck.module.command.test.add.new.aclcategories");
+
+    if (RedisModule_SetCommandACLCategories(test_add_new_aclcategories, "foocategory") == REDISMODULE_ERR)
+        return REDISMODULE_ERR;
+    
     return REDISMODULE_OK;
 }
diff --git a/tests/modules/auth.c b/tests/modules/auth.c
index 19be95a0ab6..b51127c1966 100644
--- a/tests/modules/auth.c
+++ b/tests/modules/auth.c
@@ -209,6 +209,7 @@ int blocking_auth_cb(RedisModuleCtx *ctx, RedisModuleString *username, RedisModu
     if (pthread_create(&tid, NULL, AuthBlock_ThreadMain, targ) != 0) {
         RedisModule_AbortBlock(bc);
     }
+    pthread_detach(tid);
     return REDISMODULE_AUTH_HANDLED;
 }
 
diff --git a/tests/modules/basics.c b/tests/modules/basics.c
index 897cb5d87e1..15b9f561996 100644
--- a/tests/modules/basics.c
+++ b/tests/modules/basics.c
@@ -2,32 +2,11 @@
  *
  * -----------------------------------------------------------------------------
  *
- * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2016-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "redismodule.h"
diff --git a/tests/modules/blockedclient.c b/tests/modules/blockedclient.c
index 92060fd338e..878315af889 100644
--- a/tests/modules/blockedclient.c
+++ b/tests/modules/blockedclient.c
@@ -90,6 +90,7 @@ int acquire_gil(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
     pthread_t tid;
     int res = pthread_create(&tid, NULL, worker, bc);
     assert(res == 0);
+    pthread_detach(tid);
 
     return REDISMODULE_OK;
 }
@@ -102,6 +103,7 @@ typedef struct {
 
 void *bg_call_worker(void *arg) {
     bg_call_data *bg = arg;
+    RedisModuleBlockedClient *bc = bg->bc;
 
     // Get Redis module context
     RedisModuleCtx *ctx = RedisModule_GetThreadSafeContext(bg->bc);
@@ -135,6 +137,12 @@ void *bg_call_worker(void *arg) {
     RedisModuleCallReply *rep = RedisModule_Call(ctx, cmd, format, bg->argv + cmd_pos + 1, bg->argc - cmd_pos - 1);
     RedisModule_FreeString(NULL, format_redis_str);
 
+    /* Free the arguments within GIL to prevent simultaneous freeing in main thread. */
+    for (int i=0; i<bg->argc; i++)
+        RedisModule_FreeString(ctx, bg->argv[i]);
+    RedisModule_Free(bg->argv);
+    RedisModule_Free(bg);
+
     // Release GIL
     RedisModule_ThreadSafeContextUnlock(ctx);
 
@@ -147,13 +155,7 @@ void *bg_call_worker(void *arg) {
     }
 
     // Unblock client
-    RedisModule_UnblockClient(bg->bc, NULL);
-
-    /* Free the arguments */
-    for (int i=0; i<bg->argc; i++)
-        RedisModule_FreeString(ctx, bg->argv[i]);
-    RedisModule_Free(bg->argv);
-    RedisModule_Free(bg);
+    RedisModule_UnblockClient(bc, NULL);
 
     // Free the Redis module context
     RedisModule_FreeThreadSafeContext(ctx);
@@ -194,6 +196,7 @@ int do_bg_rm_call(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
     pthread_t tid;
     int res = pthread_create(&tid, NULL, bg_call_worker, bg);
     assert(res == 0);
+    pthread_detach(tid);
 
     return REDISMODULE_OK;
 }
@@ -343,6 +346,7 @@ static void rm_call_async_reply_on_thread(RedisModuleCtx *ctx, RedisModuleCallRe
     pthread_t tid;
     int res = pthread_create(&tid, NULL, send_async_reply, ta_rm_call_ctx);
     assert(res == 0);
+    pthread_detach(tid);
 }
 
 /*
@@ -628,16 +632,23 @@ static void timer_callback(RedisModuleCtx *ctx, void *data)
     RedisModule_FreeThreadSafeContext(reply_ctx);
 }
 
+/* unblock_by_timer <period_ms> <timeout_ms>
+ * period_ms is the period of the timer.
+ * timeout_ms is the blocking timeout. */
 int unblock_by_timer(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
 {
-    if (argc != 2)
+    if (argc != 3)
         return RedisModule_WrongArity(ctx);
 
     long long period;
+    long long timeout;
     if (RedisModule_StringToLongLong(argv[1],&period) != REDISMODULE_OK)
         return RedisModule_ReplyWithError(ctx,"ERR invalid period");
+    if (RedisModule_StringToLongLong(argv[2],&timeout) != REDISMODULE_OK) {
+        return RedisModule_ReplyWithError(ctx,"ERR invalid timeout");
+    }
 
-    RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx, NULL, NULL, NULL, 0);
+    RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx, NULL, NULL, NULL, timeout);
     RedisModule_CreateTimer(ctx, period, timer_callback, bc);
     return REDISMODULE_OK;
 }
diff --git a/tests/modules/blockonbackground.c b/tests/modules/blockonbackground.c
index 2e3b1a55733..7aeb011e456 100644
--- a/tests/modules/blockonbackground.c
+++ b/tests/modules/blockonbackground.c
@@ -7,12 +7,41 @@
 
 #define UNUSED(x) (void)(x)
 
+typedef struct {
+    /* Mutex for protecting RedisModule_BlockedClientMeasureTime*() API from race
+     * conditions due to timeout callback triggered in the main thread. */
+    pthread_mutex_t measuretime_mutex;
+    int measuretime_completed; /* Indicates that time measure has ended and will not continue further */
+    int myint; /* Used for replying */
+} BlockPrivdata;
+
+void blockClientPrivdataInit(RedisModuleBlockedClient *bc) {
+    BlockPrivdata *block_privdata = RedisModule_Calloc(1, sizeof(*block_privdata));
+    block_privdata->measuretime_mutex = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER;
+    RedisModule_BlockClientSetPrivateData(bc, block_privdata);
+}
+
+void blockClientMeasureTimeStart(RedisModuleBlockedClient *bc, BlockPrivdata *block_privdata) {
+    pthread_mutex_lock(&block_privdata->measuretime_mutex);
+    RedisModule_BlockedClientMeasureTimeStart(bc);
+    pthread_mutex_unlock(&block_privdata->measuretime_mutex);
+}
+
+void blockClientMeasureTimeEnd(RedisModuleBlockedClient *bc, BlockPrivdata *block_privdata, int completed) {
+    pthread_mutex_lock(&block_privdata->measuretime_mutex);
+    if (!block_privdata->measuretime_completed) {
+        RedisModule_BlockedClientMeasureTimeEnd(bc);
+        if (completed) block_privdata->measuretime_completed = 1;
+    }
+    pthread_mutex_unlock(&block_privdata->measuretime_mutex);
+}
+
 /* Reply callback for blocking command BLOCK.DEBUG */
 int HelloBlock_Reply(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
     UNUSED(argv);
     UNUSED(argc);
-    int *myint = RedisModule_GetBlockedClientPrivateData(ctx);
-    return RedisModule_ReplyWithLongLong(ctx,*myint);
+    BlockPrivdata *block_privdata = RedisModule_GetBlockedClientPrivateData(ctx);
+    return RedisModule_ReplyWithLongLong(ctx,block_privdata->myint);
 }
 
 /* Timeout callback for blocking command BLOCK.DEBUG */
@@ -20,13 +49,16 @@ int HelloBlock_Timeout(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
     UNUSED(argv);
     UNUSED(argc);
     RedisModuleBlockedClient *bc = RedisModule_GetBlockedClientHandle(ctx);
-    RedisModule_BlockedClientMeasureTimeEnd(bc);
+    BlockPrivdata *block_privdata = RedisModule_GetBlockedClientPrivateData(ctx);
+    blockClientMeasureTimeEnd(bc, block_privdata, 1);
     return RedisModule_ReplyWithSimpleString(ctx,"Request timedout");
 }
 
 /* Private data freeing callback for BLOCK.DEBUG command. */
 void HelloBlock_FreeData(RedisModuleCtx *ctx, void *privdata) {
     UNUSED(ctx);
+    BlockPrivdata *block_privdata = privdata;
+    pthread_mutex_destroy(&block_privdata->measuretime_mutex);
     RedisModule_Free(privdata);
 }
 
@@ -42,19 +74,20 @@ void *BlockDebug_ThreadMain(void *arg) {
     RedisModuleBlockedClient *bc = targ[0];
     long long delay = (unsigned long)targ[1];
     long long enable_time_track = (unsigned long)targ[2];
+    BlockPrivdata *block_privdata = RedisModule_BlockClientGetPrivateData(bc);
+
     if (enable_time_track)
-        RedisModule_BlockedClientMeasureTimeStart(bc);
+        blockClientMeasureTimeStart(bc, block_privdata);
     RedisModule_Free(targ);
 
     struct timespec ts;
     ts.tv_sec = delay / 1000;
     ts.tv_nsec = (delay % 1000) * 1000000;
     nanosleep(&ts, NULL);
-    int *r = RedisModule_Alloc(sizeof(int));
-    *r = rand();
     if (enable_time_track)
-        RedisModule_BlockedClientMeasureTimeEnd(bc);
-    RedisModule_UnblockClient(bc,r);
+        blockClientMeasureTimeEnd(bc, block_privdata, 0);
+    block_privdata->myint = rand();
+    RedisModule_UnblockClient(bc,block_privdata);
     return NULL;
 }
 
@@ -64,23 +97,22 @@ void *DoubleBlock_ThreadMain(void *arg) {
     void **targ = arg;
     RedisModuleBlockedClient *bc = targ[0];
     long long delay = (unsigned long)targ[1];
-    RedisModule_BlockedClientMeasureTimeStart(bc);
+    BlockPrivdata *block_privdata = RedisModule_BlockClientGetPrivateData(bc);
+    blockClientMeasureTimeStart(bc, block_privdata);
     RedisModule_Free(targ);
     struct timespec ts;
     ts.tv_sec = delay / 1000;
     ts.tv_nsec = (delay % 1000) * 1000000;
     nanosleep(&ts, NULL);
-    int *r = RedisModule_Alloc(sizeof(int));
-    *r = rand();
-    RedisModule_BlockedClientMeasureTimeEnd(bc);
+    blockClientMeasureTimeEnd(bc, block_privdata, 0);
     /* call again RedisModule_BlockedClientMeasureTimeStart() and
      * RedisModule_BlockedClientMeasureTimeEnd and ensure that the
      * total execution time is 2x the delay. */
-    RedisModule_BlockedClientMeasureTimeStart(bc);
+    blockClientMeasureTimeStart(bc, block_privdata);
     nanosleep(&ts, NULL);
-    RedisModule_BlockedClientMeasureTimeEnd(bc);
-
-    RedisModule_UnblockClient(bc,r);
+    blockClientMeasureTimeEnd(bc, block_privdata, 0);
+    block_privdata->myint = rand();
+    RedisModule_UnblockClient(bc,block_privdata);
     return NULL;
 }
 
@@ -107,6 +139,7 @@ int HelloBlock_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int a
 
     pthread_t tid;
     RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx,HelloBlock_Reply,HelloBlock_Timeout,HelloBlock_FreeData,timeout);
+    blockClientPrivdataInit(bc);
 
     /* Here we set a disconnection handler, however since this module will
      * block in sleep() in a thread, there is not much we can do in the
@@ -126,6 +159,7 @@ int HelloBlock_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int a
         RedisModule_AbortBlock(bc);
         return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread");
     }
+    pthread_detach(tid);
     return REDISMODULE_OK;
 }
 
@@ -148,6 +182,7 @@ int HelloBlockNoTracking_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **a
 
     pthread_t tid;
     RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx,HelloBlock_Reply,HelloBlock_Timeout,HelloBlock_FreeData,timeout);
+    blockClientPrivdataInit(bc);
 
     /* Here we set a disconnection handler, however since this module will
      * block in sleep() in a thread, there is not much we can do in the
@@ -167,6 +202,7 @@ int HelloBlockNoTracking_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **a
         RedisModule_AbortBlock(bc);
         return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread");
     }
+    pthread_detach(tid);
     return REDISMODULE_OK;
 }
 
@@ -184,6 +220,7 @@ int HelloDoubleBlock_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv,
 
     pthread_t tid;
     RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx,HelloBlock_Reply,HelloBlock_Timeout,HelloBlock_FreeData,0);
+    blockClientPrivdataInit(bc);
 
     /* Now that we setup a blocking client, we need to pass the control
      * to the thread. However we need to pass arguments to the thread:
@@ -196,6 +233,7 @@ int HelloDoubleBlock_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv,
         RedisModule_AbortBlock(bc);
         return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread");
     }
+    pthread_detach(tid);
     return REDISMODULE_OK;
 }
 
diff --git a/tests/modules/commandfilter.c b/tests/modules/commandfilter.c
index e44f6eb6ef1..56e517ae3f5 100644
--- a/tests/modules/commandfilter.c
+++ b/tests/modules/commandfilter.c
@@ -1,6 +1,7 @@
 #include "redismodule.h"
 
 #include <string.h>
+#include <strings.h>
 
 static RedisModuleString *log_key_name;
 
@@ -92,7 +93,7 @@ int CommandFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int
     return REDISMODULE_OK;
 }
 
-int CommandFilter_UnfilteredClientdId(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
+int CommandFilter_UnfilteredClientId(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
 {
     if (argc < 2)
         return RedisModule_WrongArity(ctx);
@@ -192,7 +193,7 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
     if (RedisModule_Init(ctx,"commandfilter",1,REDISMODULE_APIVER_1)
             == REDISMODULE_ERR) return REDISMODULE_ERR;
 
-    if (argc != 2) {
+    if (argc != 2 && argc != 3) {
         RedisModule_Log(ctx, "warning", "Log key name not specified");
         return REDISMODULE_ERR;
     }
@@ -219,7 +220,7 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
             return REDISMODULE_ERR;
 
     if (RedisModule_CreateCommand(ctx, unfiltered_clientid_name,
-                CommandFilter_UnfilteredClientdId, "admin", 1,1,1) == REDISMODULE_ERR)
+                CommandFilter_UnfilteredClientId, "admin", 1,1,1) == REDISMODULE_ERR)
             return REDISMODULE_ERR;
 
     if ((filter = RedisModule_RegisterCommandFilter(ctx, CommandFilter_CommandFilter, 
@@ -229,6 +230,16 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
     if ((filter1 = RedisModule_RegisterCommandFilter(ctx, CommandFilter_BlmoveSwap, 0)) == NULL)
         return REDISMODULE_ERR;
 
+    if (argc == 3) {
+        const char *ptr = RedisModule_StringPtrLen(argv[2], NULL);
+        if (!strcasecmp(ptr, "noload")) {
+            /* This is a hint that we return ERR at the last moment of OnLoad. */
+            RedisModule_FreeString(ctx, log_key_name);
+            if (retained) RedisModule_FreeString(NULL, retained);
+            return REDISMODULE_ERR;
+        }
+    }
+
     return REDISMODULE_OK;
 }
 
diff --git a/tests/modules/crash.c b/tests/modules/crash.c
new file mode 100644
index 00000000000..c7eccda529a
--- /dev/null
+++ b/tests/modules/crash.c
@@ -0,0 +1,39 @@
+#include "redismodule.h"
+
+#include <strings.h>
+#include <sys/mman.h>
+
+#define UNUSED(V) ((void) V)
+
+void assertCrash(RedisModuleInfoCtx *ctx, int for_crash_report) {
+    UNUSED(ctx);
+    UNUSED(for_crash_report);
+    RedisModule_Assert(0);
+}
+
+void segfaultCrash(RedisModuleInfoCtx *ctx, int for_crash_report) {
+    UNUSED(ctx);
+    UNUSED(for_crash_report);
+    /* Compiler gives warnings about writing to a random address
+     * e.g "*((char*)-1) = 'x';". As a workaround, we map a read-only area
+     * and try to write there to trigger segmentation fault. */
+    char *p = mmap(NULL, 4096, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    *p = 'x';
+}
+
+int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+    REDISMODULE_NOT_USED(argv);
+    REDISMODULE_NOT_USED(argc);
+    if (RedisModule_Init(ctx,"infocrash",1,REDISMODULE_APIVER_1)
+            == REDISMODULE_ERR) return REDISMODULE_ERR;
+    RedisModule_Assert(argc == 1);
+    if (!strcasecmp(RedisModule_StringPtrLen(argv[0], NULL), "segfault")) {
+        if (RedisModule_RegisterInfoFunc(ctx, segfaultCrash) == REDISMODULE_ERR) return REDISMODULE_ERR;
+    } else if(!strcasecmp(RedisModule_StringPtrLen(argv[0], NULL), "assert")) {
+        if (RedisModule_RegisterInfoFunc(ctx, assertCrash) == REDISMODULE_ERR) return REDISMODULE_ERR;
+    } else {
+        return REDISMODULE_ERR;
+    }
+
+    return REDISMODULE_OK;
+}
diff --git a/tests/modules/hooks.c b/tests/modules/hooks.c
index e0ff0c1368f..c7e695ac2f3 100644
--- a/tests/modules/hooks.c
+++ b/tests/modules/hooks.c
@@ -2,37 +2,17 @@
  *
  * -----------------------------------------------------------------------------
  *
- * Copyright (c) 2019, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2019-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "redismodule.h"
 #include <stdio.h>
 #include <string.h>
+#include <strings.h>
 #include <assert.h>
 
 /* We need to store events to be able to test and see what we got, and we can't
@@ -407,9 +387,6 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
         return REDISMODULE_ERR; \
     }
 
-    REDISMODULE_NOT_USED(argv);
-    REDISMODULE_NOT_USED(argc);
-
     if (RedisModule_Init(ctx,"testhook",1,REDISMODULE_APIVER_1)
         == REDISMODULE_ERR) return REDISMODULE_ERR;
 
@@ -471,6 +448,18 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
     if (RedisModule_CreateCommand(ctx,"hooks.pexpireat", cmdKeyExpiry,"",0,0,0) == REDISMODULE_ERR)
         return REDISMODULE_ERR;
 
+    if (argc == 1) {
+        const char *ptr = RedisModule_StringPtrLen(argv[0], NULL);
+        if (!strcasecmp(ptr, "noload")) {
+            /* This is a hint that we return ERR at the last moment of OnLoad. */
+            RedisModule_FreeDict(ctx, event_log);
+            RedisModule_FreeDict(ctx, removed_event_log);
+            RedisModule_FreeDict(ctx, removed_subevent_type);
+            RedisModule_FreeDict(ctx, removed_expiry_log);
+            return REDISMODULE_ERR;
+        }
+    }
+
     return REDISMODULE_OK;
 }
 
diff --git a/tests/modules/infotest.c b/tests/modules/infotest.c
index 87a89dcb16b..b93a0c48991 100644
--- a/tests/modules/infotest.c
+++ b/tests/modules/infotest.c
@@ -28,8 +28,8 @@ void InfoFunc(RedisModuleInfoCtx *ctx, int for_crash_report) {
 
     if (for_crash_report) {
         RedisModule_InfoAddSection(ctx, "Klingon");
-        RedisModule_InfoAddFieldCString(ctx, "one", "wa’");
-        RedisModule_InfoAddFieldCString(ctx, "two", "cha’");
+        RedisModule_InfoAddFieldCString(ctx, "one", "wa'");
+        RedisModule_InfoAddFieldCString(ctx, "two", "cha'");
         RedisModule_InfoAddFieldCString(ctx, "three", "wej");
     }
 
diff --git a/tests/modules/keyspace_events.c b/tests/modules/keyspace_events.c
index 46eb688a575..a1456ccbc17 100644
--- a/tests/modules/keyspace_events.c
+++ b/tests/modules/keyspace_events.c
@@ -2,32 +2,11 @@
  *
  * -----------------------------------------------------------------------------
  *
- * Copyright (c) 2020, Meir Shpilraien <meir at redislabs dot com>
+ * Copyright (c) 2020-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #define _BSD_SOURCE
@@ -36,6 +15,7 @@
 #include "redismodule.h"
 #include <stdio.h>
 #include <string.h>
+#include <strings.h>
 #include <unistd.h>
 
 ustime_t cached_time = 0;
@@ -318,9 +298,6 @@ static int cmdGetDels(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
 /* This function must be present on each Redis module. It is used in order to
  * register the commands into the Redis server. */
 int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
-    REDISMODULE_NOT_USED(argv);
-    REDISMODULE_NOT_USED(argc);
-
     if (RedisModule_Init(ctx,"testkeyspace",1,REDISMODULE_APIVER_1) == REDISMODULE_ERR){
         return REDISMODULE_ERR;
     }
@@ -405,6 +382,16 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
         return REDISMODULE_ERR;
     }
 
+    if (argc == 1) {
+        const char *ptr = RedisModule_StringPtrLen(argv[0], NULL);
+        if (!strcasecmp(ptr, "noload")) {
+            /* This is a hint that we return ERR at the last moment of OnLoad. */
+            RedisModule_FreeDict(ctx, loaded_event_log);
+            RedisModule_FreeDict(ctx, module_event_log);
+            return REDISMODULE_ERR;
+        }
+    }
+
     return REDISMODULE_OK;
 }
 
diff --git a/tests/modules/misc.c b/tests/modules/misc.c
index 46bfcb1176f..b508e656d69 100644
--- a/tests/modules/misc.c
+++ b/tests/modules/misc.c
@@ -503,6 +503,43 @@ int test_ull_conv(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
     return REDISMODULE_OK;
 }
 
+int test_malloc_api(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+    UNUSED(argv);
+    UNUSED(argc);
+
+    void *p;
+
+    p = RedisModule_TryAlloc(1024);
+    memset(p, 0, 1024);
+    RedisModule_Free(p);
+
+    p = RedisModule_TryCalloc(1, 1024);
+    memset(p, 1, 1024);
+
+    p = RedisModule_TryRealloc(p, 5 * 1024);
+    memset(p, 1, 5 * 1024);
+    RedisModule_Free(p);
+
+    RedisModule_ReplyWithSimpleString(ctx, "OK");
+    return REDISMODULE_OK;
+}
+
+int test_keyslot(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+    /* Static check of the ClusterKeySlot + ClusterCanonicalKeyNameInSlot
+     * round-trip for all slots. */
+    for (unsigned int slot = 0; slot < 16384; slot++) {
+        const char *tag = RedisModule_ClusterCanonicalKeyNameInSlot(slot);
+        RedisModuleString *key = RedisModule_CreateStringPrintf(ctx, "x{%s}y", tag);
+        assert(slot == RedisModule_ClusterKeySlot(key));
+        RedisModule_FreeString(ctx, key);
+    }
+    if (argc != 2){
+        return RedisModule_WrongArity(ctx);
+    }
+    unsigned int slot = RedisModule_ClusterKeySlot(argv[1]);
+    return RedisModule_ReplyWithLongLong(ctx, slot);
+}
+
 int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
     REDISMODULE_NOT_USED(argv);
     REDISMODULE_NOT_USED(argc);
@@ -566,6 +603,10 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
         return REDISMODULE_ERR;
     if (RedisModule_CreateCommand(ctx, "test.clear_n_events", test_clear_n_events,"", 0, 0, 0) == REDISMODULE_ERR)
         return REDISMODULE_ERR;
+    if (RedisModule_CreateCommand(ctx, "test.malloc_api", test_malloc_api,"", 0, 0, 0) == REDISMODULE_ERR)
+        return REDISMODULE_ERR;
+    if (RedisModule_CreateCommand(ctx, "test.keyslot", test_keyslot, "", 0, 0, 0) == REDISMODULE_ERR)
+        return REDISMODULE_ERR;
 
     return REDISMODULE_OK;
 }
diff --git a/tests/modules/postnotifications.c b/tests/modules/postnotifications.c
index ca3a15b4377..e701dda86b0 100644
--- a/tests/modules/postnotifications.c
+++ b/tests/modules/postnotifications.c
@@ -2,32 +2,11 @@
  *
  * -----------------------------------------------------------------------------
  *
- * Copyright (c) 2020, Meir Shpilraien <meir at redislabs dot com>
+ * Copyright (c) 2020-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 /* This module allow to verify 'RedisModule_AddPostNotificationJob' by registering to 3
@@ -75,7 +54,8 @@ static int KeySpace_NotificationExpired(RedisModuleCtx *ctx, int type, const cha
     REDISMODULE_NOT_USED(key);
 
     RedisModuleString *new_key = RedisModule_CreateString(NULL, "expired", 7);
-    RedisModule_AddPostNotificationJob(ctx, KeySpace_PostNotificationString, new_key, KeySpace_PostNotificationStringFreePD);
+    int res = RedisModule_AddPostNotificationJob(ctx, KeySpace_PostNotificationString, new_key, KeySpace_PostNotificationStringFreePD);
+    if (res == REDISMODULE_ERR) KeySpace_PostNotificationStringFreePD(new_key);
     return REDISMODULE_OK;
 }
 
@@ -90,8 +70,13 @@ static int KeySpace_NotificationEvicted(RedisModuleCtx *ctx, int type, const cha
         return REDISMODULE_OK; /* do not count the evicted key */
     }
 
+    if (strncmp(key_str, "before_evicted", 14) == 0) {
+        return REDISMODULE_OK; /* do not count the before_evicted key */
+    }
+
     RedisModuleString *new_key = RedisModule_CreateString(NULL, "evicted", 7);
-    RedisModule_AddPostNotificationJob(ctx, KeySpace_PostNotificationString, new_key, KeySpace_PostNotificationStringFreePD);
+    int res = RedisModule_AddPostNotificationJob(ctx, KeySpace_PostNotificationString, new_key, KeySpace_PostNotificationStringFreePD);
+    if (res == REDISMODULE_ERR) KeySpace_PostNotificationStringFreePD(new_key);
     return REDISMODULE_OK;
 }
 
@@ -117,7 +102,8 @@ static int KeySpace_NotificationString(RedisModuleCtx *ctx, int type, const char
         new_key = RedisModule_CreateStringPrintf(NULL, "string_changed{%s}", key_str);
     }
 
-    RedisModule_AddPostNotificationJob(ctx, KeySpace_PostNotificationString, new_key, KeySpace_PostNotificationStringFreePD);
+    int res = RedisModule_AddPostNotificationJob(ctx, KeySpace_PostNotificationString, new_key, KeySpace_PostNotificationStringFreePD);
+    if (res == REDISMODULE_ERR) KeySpace_PostNotificationStringFreePD(new_key);
     return REDISMODULE_OK;
 }
 
@@ -133,7 +119,8 @@ static int KeySpace_LazyExpireInsidePostNotificationJob(RedisModuleCtx *ctx, int
     }
 
     RedisModuleString *new_key = RedisModule_CreateString(NULL, key_str + 5, strlen(key_str) - 5);;
-    RedisModule_AddPostNotificationJob(ctx, KeySpace_PostNotificationReadKey, new_key, KeySpace_PostNotificationStringFreePD);
+    int res = RedisModule_AddPostNotificationJob(ctx, KeySpace_PostNotificationReadKey, new_key, KeySpace_PostNotificationStringFreePD);
+    if (res == REDISMODULE_ERR) KeySpace_PostNotificationStringFreePD(new_key);
     return REDISMODULE_OK;
 }
 
@@ -183,9 +170,60 @@ static int KeySpace_PostNotificationsAsyncSet(RedisModuleCtx *ctx, RedisModuleSt
         RedisModule_AbortBlock(bc);
         return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread");
     }
+    pthread_detach(tid);
     return REDISMODULE_OK;
 }
 
+typedef struct KeySpace_EventPostNotificationCtx {
+    RedisModuleString *triggered_on;
+    RedisModuleString *new_key;
+} KeySpace_EventPostNotificationCtx;
+
+static void KeySpace_ServerEventPostNotificationFree(void *pd) {
+    KeySpace_EventPostNotificationCtx *pn_ctx = pd;
+    RedisModule_FreeString(NULL, pn_ctx->new_key);
+    RedisModule_FreeString(NULL, pn_ctx->triggered_on);
+    RedisModule_Free(pn_ctx);
+}
+
+static void KeySpace_ServerEventPostNotification(RedisModuleCtx *ctx, void *pd) {
+    REDISMODULE_NOT_USED(ctx);
+    KeySpace_EventPostNotificationCtx *pn_ctx = pd;
+    RedisModuleCallReply* rep = RedisModule_Call(ctx, "lpush", "!ss", pn_ctx->new_key, pn_ctx->triggered_on);
+    RedisModule_FreeCallReply(rep);
+}
+
+static void KeySpace_ServerEventCallback(RedisModuleCtx *ctx, RedisModuleEvent eid, uint64_t subevent, void *data) {
+    REDISMODULE_NOT_USED(eid);
+    REDISMODULE_NOT_USED(data);
+    if (subevent > 3) {
+        RedisModule_Log(ctx, "warning", "Got an unexpected subevent '%llu'", (unsigned long long)subevent);
+        return;
+    }
+    static const char* events[] = {
+            "before_deleted",
+            "before_expired",
+            "before_evicted",
+            "before_overwritten",
+    };
+
+    const RedisModuleString *key_name = RedisModule_GetKeyNameFromModuleKey(((RedisModuleKeyInfo*)data)->key);
+    const char *key_str = RedisModule_StringPtrLen(key_name, NULL);
+
+    for (int i = 0 ; i < 4 ; ++i) {
+        const char *event = events[i];
+        if (strncmp(key_str, event , strlen(event)) == 0) {
+            return; /* don't log any event on our tracking keys */
+        }
+    }
+
+    KeySpace_EventPostNotificationCtx *pn_ctx = RedisModule_Alloc(sizeof(*pn_ctx));
+    pn_ctx->triggered_on = RedisModule_HoldString(NULL, (RedisModuleString*)key_name);
+    pn_ctx->new_key = RedisModule_CreateString(NULL, events[subevent], strlen(events[subevent]));
+    int res = RedisModule_AddPostNotificationJob(ctx, KeySpace_ServerEventPostNotification, pn_ctx, KeySpace_ServerEventPostNotificationFree);
+    if (res == REDISMODULE_ERR) KeySpace_ServerEventPostNotificationFree(pn_ctx);
+}
+
 /* This function must be present on each Redis module. It is used in order to
  * register the commands into the Redis server. */
 int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
@@ -200,6 +238,14 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
         return REDISMODULE_ERR;
     }
 
+    int with_key_events = 0;
+    if (argc >= 1) {
+        const char *arg = RedisModule_StringPtrLen(argv[0], 0);
+        if (strcmp(arg, "with_key_events") == 0) {
+            with_key_events = 1;
+        }
+    }
+
     RedisModule_SetModuleOptions(ctx, REDISMODULE_OPTIONS_ALLOW_NESTED_KEYSPACE_NOTIFICATIONS);
 
     if(RedisModule_SubscribeToKeyspaceEvents(ctx, REDISMODULE_NOTIFY_STRING, KeySpace_NotificationString) != REDISMODULE_OK){
@@ -222,6 +268,12 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
         return REDISMODULE_ERR;
     }
 
+    if (with_key_events) {
+        if(RedisModule_SubscribeToServerEvent(ctx, RedisModuleEvent_Key, KeySpace_ServerEventCallback) != REDISMODULE_OK){
+            return REDISMODULE_ERR;
+        }
+    }
+
     if (RedisModule_CreateCommand(ctx, "postnotification.async_set", KeySpace_PostNotificationsAsyncSet,
                                       "write", 0, 0, 0) == REDISMODULE_ERR){
         return REDISMODULE_ERR;
diff --git a/tests/modules/propagate.c b/tests/modules/propagate.c
index d5132a5b4ec..7e737589e46 100644
--- a/tests/modules/propagate.c
+++ b/tests/modules/propagate.c
@@ -9,32 +9,11 @@
  *
  * -----------------------------------------------------------------------------
  *
- * Copyright (c) 2019, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2019-Present, Redis Ltd.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *   * Neither the name of Redis nor the names of its contributors may be used
- *     to endorse or promote products derived from this software without
- *     specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include "redismodule.h"
@@ -206,7 +185,7 @@ int propagateTestThreadCommand(RedisModuleCtx *ctx, RedisModuleString **argv, in
     pthread_t tid;
     if (pthread_create(&tid,NULL,threadMain,NULL) != 0)
         return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread");
-    REDISMODULE_NOT_USED(tid);
+    pthread_detach(tid);
 
     RedisModule_ReplyWithSimpleString(ctx,"OK");
     return REDISMODULE_OK;
@@ -237,7 +216,7 @@ int propagateTestDetachedThreadCommand(RedisModuleCtx *ctx, RedisModuleString **
     pthread_t tid;
     if (pthread_create(&tid,NULL,threadDetachedMain,NULL) != 0)
         return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread");
-    REDISMODULE_NOT_USED(tid);
+    pthread_detach(tid);
 
     RedisModule_ReplyWithSimpleString(ctx,"OK");
     return REDISMODULE_OK;
@@ -323,6 +302,21 @@ int propagateTestIncr(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
     return REDISMODULE_OK;
 }
 
+int propagateTestVerbatim(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+    if (argc < 2){
+        RedisModule_WrongArity(ctx);
+        return REDISMODULE_OK;
+    }
+
+    long long replicate_num;
+    RedisModule_StringToLongLong(argv[1], &replicate_num);
+    /* Replicate the command verbatim for the specified number of times. */
+    for (long long i = 0; i < replicate_num; i++)
+        RedisModule_ReplicateVerbatim(ctx);
+    RedisModule_ReplyWithSimpleString(ctx,"OK");
+    return REDISMODULE_OK;
+}
+
 int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
     REDISMODULE_NOT_USED(argv);
     REDISMODULE_NOT_USED(argc);
@@ -389,6 +383,11 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
                 propagateTestIncr,
                 "write",1,1,1) == REDISMODULE_ERR)
             return REDISMODULE_ERR;
+        
+    if (RedisModule_CreateCommand(ctx,"propagate-test.verbatim",
+            propagateTestVerbatim,
+            "write",1,1,1) == REDISMODULE_ERR)
+        return REDISMODULE_ERR;
 
     return REDISMODULE_OK;
 }
diff --git a/tests/modules/usercall.c b/tests/modules/usercall.c
index 6b23974d4f0..52a9533a713 100644
--- a/tests/modules/usercall.c
+++ b/tests/modules/usercall.c
@@ -115,6 +115,7 @@ typedef struct {
 
 void *bg_call_worker(void *arg) {
     bg_call_data *bg = arg;
+    RedisModuleBlockedClient *bc = bg->bc;
 
     // Get Redis module context
     RedisModuleCtx *ctx = RedisModule_GetThreadSafeContext(bg->bc);
@@ -136,6 +137,12 @@ void *bg_call_worker(void *arg) {
     RedisModuleCallReply *rep = RedisModule_Call(ctx, cmd, format, bg->argv + 3, bg->argc - 3);
     RedisModule_FreeString(NULL, format_redis_str);
 
+    /* Free the arguments within GIL to prevent simultaneous freeing in main thread. */
+    for (int i=0; i<bg->argc; i++)
+        RedisModule_FreeString(ctx, bg->argv[i]);
+    RedisModule_Free(bg->argv);
+    RedisModule_Free(bg);
+
     // Release GIL
     RedisModule_ThreadSafeContextUnlock(ctx);
 
@@ -148,13 +155,7 @@ void *bg_call_worker(void *arg) {
     }
 
     // Unblock client
-    RedisModule_UnblockClient(bg->bc, NULL);
-
-    /* Free the arguments */
-    for (int i=0; i<bg->argc; i++)
-        RedisModule_FreeString(ctx, bg->argv[i]);
-    RedisModule_Free(bg->argv);
-    RedisModule_Free(bg);
+    RedisModule_UnblockClient(bc, NULL);
 
     // Free the Redis module context
     RedisModule_FreeThreadSafeContext(ctx);
@@ -195,6 +196,7 @@ int call_with_user_bg(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
     pthread_t tid;
     int res = pthread_create(&tid, NULL, bg_call_worker, bg);
     assert(res == 0);
+    pthread_detach(tid);
 
     return REDISMODULE_OK;
 }
diff --git a/tests/sentinel/run.tcl b/tests/sentinel/run.tcl
index 6d3db324dd2..2c5089502c3 100644
--- a/tests/sentinel/run.tcl
+++ b/tests/sentinel/run.tcl
@@ -1,6 +1,10 @@
-# Sentinel test suite. Copyright (C) 2014 Salvatore Sanfilippo antirez@gmail.com
-# This software is released under the BSD License. See the COPYING file for
-# more information.
+# Sentinel test suite.
+#
+# Copyright (C) 2014-Present, Redis Ltd.
+# All Rights reserved.
+#
+# Licensed under your choice of the Redis Source Available License 2.0
+# (RSALv2) or the Server Side Public License v1 (SSPLv1).
 
 cd tests/sentinel
 source ../instances.tcl
diff --git a/tests/sentinel/tests/00-base.tcl b/tests/sentinel/tests/00-base.tcl
index b4d65751b55..7b643950818 100644
--- a/tests/sentinel/tests/00-base.tcl
+++ b/tests/sentinel/tests/00-base.tcl
@@ -195,8 +195,16 @@ test "New master [join $addr {:}] role matches" {
 }
 
 test "SENTINEL RESET can resets the master" {
-     assert_equal 1 [S 0 SENTINEL RESET mymaster]
-     assert_equal 0 [llength [S 0 SENTINEL SENTINELS mymaster]]
-     assert_equal 0 [llength [S 0 SENTINEL SLAVES mymaster]]
-     assert_equal 0 [llength [S 0 SENTINEL REPLICAS mymaster]]
+    # After SENTINEL RESET, sometimes the sentinel can sense the master again,
+    # causing the test to fail. Here we give it a few more chances.
+    for {set j 0} {$j < 10} {incr j} {
+        assert_equal 1 [S 0 SENTINEL RESET mymaster]
+        set res1 [llength [S 0 SENTINEL SENTINELS mymaster]]
+        set res2 [llength [S 0 SENTINEL SLAVES mymaster]]
+        set res3 [llength [S 0 SENTINEL REPLICAS mymaster]]
+        if {$res1 eq 0 && $res2 eq 0 && $res3 eq 0} break
+    }
+    assert_equal 0 $res1
+    assert_equal 0 $res2
+    assert_equal 0 $res3
 }
diff --git a/tests/support/aofmanifest.tcl b/tests/support/aofmanifest.tcl
index ffde3e354ee..151626294fd 100644
--- a/tests/support/aofmanifest.tcl
+++ b/tests/support/aofmanifest.tcl
@@ -161,9 +161,13 @@ proc create_aof_dir {dir_path} {
 }
 
 proc start_server_aof {overrides code} {
+    upvar defaults defaults srv srv server_path server_path aof_basename aof_basename aof_dirpath aof_dirpath aof_manifest_file aof_manifest_file aof_manifest_file2 aof_manifest_file2
+    set config [concat $defaults $overrides]
+    start_server [list overrides $config keep_persistence true] $code
+}
+
+proc start_server_aof_ex {overrides options code} {
     upvar defaults defaults srv srv server_path server_path
     set config [concat $defaults $overrides]
-    set srv [start_server [list overrides $config]]
-    uplevel 1 $code
-    kill_server $srv
+    start_server [concat [list overrides $config keep_persistence true] $options] $code
 }
diff --git a/tests/support/cluster.tcl b/tests/support/cluster.tcl
index 081ef6a9522..cbe11d7c584 100644
--- a/tests/support/cluster.tcl
+++ b/tests/support/cluster.tcl
@@ -1,6 +1,10 @@
 # Tcl redis cluster client as a wrapper of redis.rb.
-# Copyright (C) 2014 Salvatore Sanfilippo
-# Released under the BSD license like Redis itself
+#
+# Copyright (C) 2014-Present, Redis Ltd.
+# All Rights reserved.
+#
+# Licensed under your choice of the Redis Source Available License 2.0
+# (RSALv2) or the Server Side Public License v1 (SSPLv1).
 #
 # Example usage:
 #
diff --git a/tests/support/cluster_util.tcl b/tests/support/cluster_util.tcl
index 2e3611e1ee2..51604664746 100644
--- a/tests/support/cluster_util.tcl
+++ b/tests/support/cluster_util.tcl
@@ -199,3 +199,30 @@ proc are_hostnames_propagated {match_string} {
     }
     return 1
 }
+
+proc wait_node_marked_fail {ref_node_index instance_id_to_check} {
+    wait_for_condition 1000 50 {
+        [check_cluster_node_mark fail $ref_node_index $instance_id_to_check]
+    } else {
+        fail "Replica node never marked as FAIL ('fail')"
+    }
+}
+
+proc wait_node_marked_pfail {ref_node_index instance_id_to_check} {
+    wait_for_condition 1000 50 {
+        [check_cluster_node_mark fail\? $ref_node_index $instance_id_to_check]
+    } else {
+        fail "Replica node never marked as PFAIL ('fail?')"
+    }
+}
+
+proc check_cluster_node_mark {flag ref_node_index instance_id_to_check} {
+    set nodes [get_cluster_nodes $ref_node_index]
+
+    foreach n $nodes {
+        if {[dict get $n id] eq $instance_id_to_check} {
+            return [cluster_has_flag $n $flag]
+        }
+    }
+    fail "Unable to find instance id in cluster nodes. ID: $instance_id_to_check"
+}
diff --git a/tests/support/redis.tcl b/tests/support/redis.tcl
index 53fa9fe915a..275c726effc 100644
--- a/tests/support/redis.tcl
+++ b/tests/support/redis.tcl
@@ -1,6 +1,10 @@
 # Tcl client library - used by the Redis test
-# Copyright (C) 2009-2014 Salvatore Sanfilippo
-# Released under the BSD license like Redis itself
+#
+# Copyright (C) 2014-Present, Redis Ltd.
+# All Rights reserved.
+#
+# Licensed under your choice of the Redis Source Available License 2.0
+# (RSALv2) or the Server Side Public License v1 (SSPLv1).
 #
 # Example usage:
 #
diff --git a/tests/support/response_transformers.tcl b/tests/support/response_transformers.tcl
index 45b3cf8f2e4..b2ff44238bd 100644
--- a/tests/support/response_transformers.tcl
+++ b/tests/support/response_transformers.tcl
@@ -1,6 +1,10 @@
 # Tcl client library - used by the Redis test
-# Copyright (C) 2009-2023 Redis Ltd.
-# Released under the BSD license like Redis itself
+#
+# Copyright (C) 2009-Present, Redis Ltd.
+# All Rights reserved.
+#
+# Licensed under your choice of the Redis Source Available License 2.0
+# (RSALv2) or the Server Side Public License v1 (SSPLv1).
 #
 # This file contains a bunch of commands whose purpose is to transform
 # a RESP3 response to RESP2
diff --git a/tests/support/server.tcl b/tests/support/server.tcl
index 67979e5a9eb..8f5659d9bd1 100644
--- a/tests/support/server.tcl
+++ b/tests/support/server.tcl
@@ -5,9 +5,9 @@ set ::valgrind_errors {}
 proc start_server_error {config_file error} {
     set err {}
     append err "Can't start the Redis server\n"
-    append err "CONFIGURATION:"
+    append err "CONFIGURATION:\n"
     append err [exec cat $config_file]
-    append err "\nERROR:"
+    append err "\nERROR:\n"
     append err [string trim $error]
     send_data_packet $::test_server_fd err $err
 }
@@ -52,16 +52,20 @@ proc kill_server config {
     }
 
     # nevermind if its already dead
-    if {![is_alive $config]} {
+    set pid [dict get $config pid]
+    if {![is_alive $pid]} {
         # Check valgrind errors if needed
         if {$::valgrind} {
             check_valgrind_errors [dict get $config stderr]
         }
 
         check_sanitizer_errors [dict get $config stderr]
+
+        # Remove this pid from the set of active pids in the test server.
+        send_data_packet $::test_server_fd server-killed $pid
+
         return
     }
-    set pid [dict get $config pid]
 
     # check for leaks
     if {![dict exists $config "skipleaks"]} {
@@ -99,7 +103,7 @@ proc kill_server config {
     } else {
         set max_wait 10000
     }
-    while {[is_alive $config]} {
+    while {[is_alive $pid]} {
         incr wait 10
 
         if {$wait == $max_wait} {
@@ -125,8 +129,7 @@ proc kill_server config {
     send_data_packet $::test_server_fd server-killed $pid
 }
 
-proc is_alive config {
-    set pid [dict get $config pid]
+proc is_alive pid {
     if {[catch {exec kill -0 $pid} err]} {
         return 0
     } else {
@@ -417,6 +420,9 @@ proc start_server {options {code undefined}} {
     set keep_persistence false
     set config_lines {}
 
+    # Wait for the server to be ready and check for server liveness/client connectivity before starting the test.
+    set wait_ready true
+
     # parse options
     foreach {option value} $options {
         switch $option {
@@ -444,6 +450,9 @@ proc start_server {options {code undefined}} {
             "keep_persistence" {
                 set keep_persistence $value
             }
+            "wait_ready" {
+                set wait_ready $value
+            }
             default {
                 error "Unknown option $option"
             }
@@ -584,7 +593,7 @@ proc start_server {options {code undefined}} {
         }
 
         if {$::valgrind} {set retrynum 1000} else {set retrynum 100}
-        if {$code ne "undefined"} {
+        if {$code ne "undefined" && $wait_ready} {
             set serverisup [server_is_up $::host $port $retrynum]
         } else {
             set serverisup 1
@@ -630,19 +639,21 @@ proc start_server {options {code undefined}} {
             error_and_quit $config_file $line
         }
 
-        while 1 {
-            # check that the server actually started and is ready for connections
-            if {[count_message_lines $stdout "Ready to accept"] > $previous_ready_count} {
-                break
-            }
-            after 10
-        }
-
         # append the server to the stack
         lappend ::servers $srv
 
-        # connect client (after server dict is put on the stack)
-        reconnect
+        if {$wait_ready} {
+            while 1 {
+                # check that the server actually started and is ready for connections
+                if {[count_message_lines $stdout "Ready to accept"] > $previous_ready_count} {
+                    break
+                }
+                after 10
+            }
+
+            # connect client (after server dict is put on the stack)
+            reconnect
+        }
 
         # remember previous num_failed to catch new errors
         set prev_num_failed $::num_failed
diff --git a/tests/support/util.tcl b/tests/support/util.tcl
index 37d3c89a9ca..09e5b1e3b4a 100644
--- a/tests/support/util.tcl
+++ b/tests/support/util.tcl
@@ -62,8 +62,8 @@ proc sanitizer_errors_from_file {filename} {
         }
 
         # GCC UBSAN output does not contain 'Sanitizer' but 'runtime error'.
-        if {[string match {*runtime error*} $log] ||
-            [string match {*Sanitizer*} $log]} {
+        if {[string match {*runtime error*} $line] ||
+            [string match {*Sanitizer*} $line]} {
             return $log
         }
     }
@@ -293,6 +293,9 @@ proc findKeyWithType {r type} {
 
 proc createComplexDataset {r ops {opt {}}} {
     set useexpire [expr {[lsearch -exact $opt useexpire] != -1}]
+    # TODO: Remove usehexpire on next commit, when RDB will support replication
+    set usehexpire [expr {[lsearch -exact $opt usehexpire] != -1}]
+
     if {[lsearch -exact $opt usetag] != -1} {
         set tag "{t}"
     } else {
@@ -386,6 +389,10 @@ proc createComplexDataset {r ops {opt {}}} {
             {hash} {
                 randpath {{*}$r hset $k $f $v} \
                         {{*}$r hdel $k $f}
+
+                if { [{*}$r hexists $k $f] && $usehexpire && rand() < 0.5} {
+                    {*}$r hexpire $k 1000 FIELDS 1 $f
+                }
             }
         }
     }
@@ -438,8 +445,14 @@ proc csvdump r {
                 hash {
                     set fields [{*}$r hgetall $k]
                     set newfields {}
-                    foreach {k v} $fields {
-                        lappend newfields [list $k $v]
+                    foreach {f v} $fields {
+                        set expirylist [{*}$r hexpiretime $k FIELDS 1 $f]
+                        if {$expirylist eq (-1)} {
+                            lappend newfields [list $f $v]
+                        } else {
+                            set e [lindex $expirylist 0]
+                            lappend newfields [list $f $e $v] # TODO: extract the actual ttl value from the list in $e
+                        }
                     }
                     set fields [lsort -index 0 $newfields]
                     foreach kv $fields {
@@ -602,16 +615,24 @@ proc stop_bg_complex_data {handle} {
 # Write num keys with the given key prefix and value size (in bytes). If idx is
 # given, it's the index (AKA level) used with the srv procedure and it specifies
 # to which Redis instance to write the keys.
-proc populate {num {prefix key:} {size 3} {idx 0} {prints false}} {
+proc populate {num {prefix key:} {size 3} {idx 0} {prints false} {expires 0}} {
     r $idx deferred 1
     if {$num > 16} {set pipeline 16} else {set pipeline $num}
     set val [string repeat A $size]
     for {set j 0} {$j < $pipeline} {incr j} {
-        r $idx set $prefix$j $val
+        if {$expires > 0} {
+            r $idx set $prefix$j $val ex $expires
+        } else {
+            r $idx set $prefix$j $val
+        }
         if {$prints} {puts $j}
     }
     for {} {$j < $num} {incr j} {
-        r $idx set $prefix$j $val
+        if {$expires > 0} {
+            r $idx set $prefix$j $val ex $expires
+        } else {
+            r $idx set $prefix$j $val
+        }
         r $idx read
         if {$prints} {puts $j}
     }
@@ -917,6 +938,14 @@ proc wait_for_blocked_clients_count {count {maxtries 100} {delay 10} {idx 0}} {
     }
 }
 
+proc wait_for_watched_clients_count {count {maxtries 100} {delay 10} {idx 0}} {
+    wait_for_condition $maxtries $delay  {
+        [s $idx watching_clients] == $count
+    } else {
+        fail "Timeout waiting for watched clients"
+    }
+}
+
 proc read_from_aof {fp} {
     # Input fp is a blocking binary file descriptor of an opened AOF file.
     if {[gets $fp count] == -1} return ""
@@ -1115,3 +1144,29 @@ proc format_command {args} {
     set _ $cmd
 }
 
+# Returns whether or not the system supports stack traces
+proc system_backtrace_supported {} {
+    set system_name [string tolower [exec uname -s]]
+    if {$system_name eq {darwin}} {
+        return 1
+    } elseif {$system_name ne {linux}} {
+        return 0
+    }
+
+    # libmusl does not support backtrace. Also return 0 on
+    # static binaries (ldd exit code 1) where we can't detect libmusl
+    catch {
+        set ldd [exec ldd src/redis-server]
+        if {![string match {*libc.*musl*} $ldd]} {
+            return 1
+        }
+    }
+    return 0
+}
+
+proc generate_largevalue_test_array {} {
+    array set largevalue {}
+    set largevalue(listpack) "hello"
+    set largevalue(quicklist) [string repeat "x" 8192]
+    return [array get largevalue]
+}
diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl
index 21fa35d4d5b..32b0184b9d8 100644
--- a/tests/test_helper.tcl
+++ b/tests/test_helper.tcl
@@ -1,6 +1,10 @@
-# Redis test suite. Copyright (C) 2009 Salvatore Sanfilippo antirez@gmail.com
-# This software is released under the BSD License. See the COPYING file for
-# more information.
+# Redis test suite.
+#
+# Copyright (C) 2014-Present, Redis Ltd.
+# All Rights reserved.
+#
+# Licensed under your choice of the Redis Source Available License 2.0
+# (RSALv2) or the Server Side Public License v1 (SSPLv1).
 
 package require Tcl 8.5
 
@@ -13,96 +17,22 @@ source tests/support/tmpfile.tcl
 source tests/support/test.tcl
 source tests/support/util.tcl
 
-set ::all_tests {
-    unit/printver
-    unit/dump
-    unit/auth
-    unit/protocol
-    unit/keyspace
-    unit/scan
-    unit/info
-    unit/info-command
-    unit/type/string
-    unit/type/incr
-    unit/type/list
-    unit/type/list-2
-    unit/type/list-3
-    unit/type/set
-    unit/type/zset
-    unit/type/hash
-    unit/type/stream
-    unit/type/stream-cgroups
-    unit/sort
-    unit/expire
-    unit/other
-    unit/multi
-    unit/quit
-    unit/aofrw
-    unit/acl
-    unit/acl-v2
-    unit/latency-monitor
-    integration/block-repl
-    integration/replication
-    integration/replication-2
-    integration/replication-3
-    integration/replication-4
-    integration/replication-psync
-    integration/replication-buffer
-    integration/shutdown
-    integration/aof
-    integration/aof-race
-    integration/aof-multi-part
-    integration/rdb
-    integration/corrupt-dump
-    integration/corrupt-dump-fuzzer
-    integration/convert-zipmap-hash-on-load
-    integration/convert-ziplist-hash-on-load
-    integration/convert-ziplist-zset-on-load
-    integration/logging
-    integration/psync2
-    integration/psync2-reg
-    integration/psync2-pingoff
-    integration/psync2-master-restart
-    integration/failover
-    integration/redis-cli
-    integration/redis-benchmark
-    integration/dismiss-mem
-    unit/pubsub
-    unit/pubsubshard
-    unit/slowlog
-    unit/scripting
-    unit/functions
-    unit/maxmemory
-    unit/introspection
-    unit/introspection-2
-    unit/limits
-    unit/obuf-limits
-    unit/bitops
-    unit/bitfield
-    unit/geo
-    unit/memefficiency
-    unit/hyperloglog
-    unit/lazyfree
-    unit/wait
-    unit/pause
-    unit/querybuf
-    unit/tls
-    unit/tracking
-    unit/oom-score-adj
-    unit/shutdown
-    unit/networking
-    unit/client-eviction
-    unit/violations
-    unit/replybufsize
-    unit/cluster/misc
-    unit/cluster/cli
-    unit/cluster/scripting
-    unit/cluster/hostnames
-    unit/cluster/human-announced-nodename
-    unit/cluster/multi-slot-operations
-    unit/cluster/slot-ownership
-    unit/cluster/links
-    unit/cluster/cluster-response-tls
+set dir [pwd]
+set ::all_tests []
+
+set test_dirs {
+    unit
+    unit/type
+    unit/cluster
+    integration
+}
+
+foreach test_dir $test_dirs {
+    set files [glob -nocomplain $dir/tests/$test_dir/*.tcl]
+
+    foreach file $files {
+        lappend ::all_tests $test_dir/[file root [file tail $file]]
+    }
 }
 # Index to the next test to run in the ::all_tests list.
 set ::next_test 0
@@ -197,6 +127,12 @@ proc srv {args} {
     dict get $srv $property
 }
 
+# Take an index to get a srv.
+proc get_srv {level} {
+    set srv [lindex $::servers end+$level]
+    return $srv
+}
+
 # Provide easy access to the client for the inner server. It's possible to
 # prepend the argument list with a negative level to access clients for
 # servers running in outer blocks.
@@ -276,7 +212,7 @@ proc redis_client {args} {
         set args [lrange $args 1 end]
     }
 
-    # create client that defers reading reply
+    # create client that won't defers reading reply
     set client [redis [srv $level "host"] [srv $level "port"] 0 $::tls]
 
     # select the right db and read the response (OK), or at least ping
diff --git a/tests/unit/acl-v2.tcl b/tests/unit/acl-v2.tcl
index af23d745e45..b259c271637 100644
--- a/tests/unit/acl-v2.tcl
+++ b/tests/unit/acl-v2.tcl
@@ -47,6 +47,15 @@ start_server {tags {"acl external:skip"}} {
         catch {r ACL SETUSER selector-syntax on (&* &fail)} e
         assert_match "*ERR Error in ACL SETUSER modifier '(*)*Adding a pattern after the*" $e
 
+        catch {r ACL SETUSER selector-syntax on (+PING (+SELECT (+DEL} e
+        assert_match "*ERR Unmatched parenthesis in acl selector*" $e
+
+        catch {r ACL SETUSER selector-syntax on (+PING (+SELECT (+DEL ) ) ) } e
+        assert_match "*ERR Error in ACL SETUSER modifier*" $e
+
+        catch {r ACL SETUSER selector-syntax on (+PING (+SELECT (+DEL ) } e
+        assert_match "*ERR Error in ACL SETUSER modifier*" $e
+
         assert_equal "" [r ACL GETUSER selector-syntax]
     }
 
diff --git a/tests/unit/acl.tcl b/tests/unit/acl.tcl
index 6dcee8b94d8..8e3fb20eac1 100644
--- a/tests/unit/acl.tcl
+++ b/tests/unit/acl.tcl
@@ -246,7 +246,27 @@ start_server {tags {"acl external:skip"}} {
         catch {$rd read} e
         set e
     } {*NOPERM*channel*}
-    
+
+    test {Subscribers are killed when revoked of channel permission} {
+        # This test covers the case that the SETUSER is requested over the subscriber
+        set rd [redis_deferring_client]
+        r ACL setuser psuser resetchannels &foo:1
+        # we must use RESP 3 since AUTH command is not supported over a subscribed client with RESP2
+        $rd HELLO 3 AUTH psuser pspass
+        $rd read
+        $rd CLIENT SETNAME deathrow
+        $rd read
+        $rd SUBSCRIBE foo:1
+        assert_match {subscribe foo:1 1} [$rd read]
+        $rd ACL setuser psuser resetchannels
+        assert_match {OK} [$rd read]
+        # 'psuser' no longer has access to "foo:1" channel, so they should get disconnected
+        catch {$rd read} e
+        assert_match {*I/O error*} $e
+        assert_no_match {*deathrow*} [r CLIENT LIST]
+        $rd close
+    } {0}
+
     test {Subscribers are killed when revoked of channel permission} {
         set rd [redis_deferring_client]
         r ACL setuser psuser resetchannels &foo:1
@@ -615,6 +635,10 @@ start_server {tags {"acl external:skip"}} {
         # Unnecessary categories are retained for potentional future compatibility
         r ACL SETUSER adv-test -@all -@dangerous
         assert_equal "-@all -@dangerous" [dict get [r ACL getuser adv-test] commands]
+
+        # Duplicate categories are compressed, regression test for #12470
+        r ACL SETUSER adv-test -@all +config +config|get -config|set +config
+        assert_equal "-@all +config" [dict get [r ACL getuser adv-test] commands]
     }
 
     test "ACL CAT with illegal arguments" {
@@ -798,6 +822,16 @@ start_server {tags {"acl external:skip"}} {
         assert {[dict get $entry username] eq {antirez}}
     }
 
+    test {ACLLOG - zero max length is correctly handled} {
+        r ACL LOG RESET
+        r CONFIG SET acllog-max-len 0
+        for {set j 0} {$j < 10} {incr j} {
+            catch {r SET obj:$j 123}
+        }
+        r AUTH default ""
+        assert {[llength [r ACL LOG]] == 0}
+    }
+
     test {ACL LOG entries are limited to a maximum amount} {
         r ACL LOG RESET
         r CONFIG SET acllog-max-len 5
@@ -809,6 +843,11 @@ start_server {tags {"acl external:skip"}} {
         assert {[llength [r ACL LOG]] == 5}
     }
 
+    test {ACL LOG entries are still present on update of max len config} {
+        r CONFIG SET acllog-max-len 0
+        assert {[llength [r ACL LOG]] == 5}
+    }
+
     test {When default user is off, new connections are not authenticated} {
         r ACL setuser default off
         catch {set rd1 [redis_deferring_client]} e
@@ -1001,16 +1040,99 @@ start_server [list overrides [list "dir" $server_path "acl-pubsub-default" "allc
         set e
     } {*NOPERM*set*}
 
+    test {ACL LOAD only disconnects affected clients} {
+        reconnect
+        r ACL SETUSER doug on nopass resetchannels &test* +@all ~*
+
+        set rd1 [redis_deferring_client]
+        set rd2 [redis_deferring_client]
+
+        $rd1 AUTH alice alice
+        $rd1 read
+        $rd1 SUBSCRIBE test1
+        $rd1 read
+
+        $rd2 AUTH doug doug
+        $rd2 read
+        $rd2 SUBSCRIBE test1
+        $rd2 read
+
+        r ACL LOAD
+        r PUBLISH test1 test-message
+
+        # Permissions for 'alice' haven't changed, so they should still be connected
+        assert_match {*test-message*} [$rd1 read]
+
+        # 'doug' no longer has access to "test1" channel, so they should get disconnected
+        catch {$rd2 read} e
+        assert_match {*I/O error*} $e
+
+        $rd1 close
+        $rd2 close
+    }
+
+    test {ACL LOAD disconnects affected subscriber} {
+        # This test covers the case that the LOAD is requested over the subscriber
+        reconnect
+        r ACL SETUSER doug on nopass resetchannels &test* +@all ~*
+
+        set rd1 [redis_deferring_client]
+
+        # we must use RESP 3 since AUTH command is not supported over a subscribed client with RESP2
+        $rd1 HELLO 3 AUTH doug doug
+        $rd1 read
+        $rd1 SUBSCRIBE test1
+        $rd1 read
+
+        $rd1 ACL LOAD
+        assert_match {OK} [$rd1 read]
+
+        # 'doug' no longer has access to "test1" channel, so they should get disconnected
+        catch {$rd1 read} e
+        assert_match {*I/O error*} $e
+
+        $rd1 close
+    }
+
+    test {ACL LOAD disconnects clients of deleted users} {
+        reconnect
+        r ACL SETUSER mortimer on >mortimer ~* &* +@all
+
+        set rd1 [redis_deferring_client]
+        set rd2 [redis_deferring_client]
+
+        $rd1 AUTH alice alice
+        $rd1 read
+        $rd1 SUBSCRIBE test
+        $rd1 read
+
+        $rd2 AUTH mortimer mortimer
+        $rd2 read
+        $rd2 SUBSCRIBE test
+        $rd2 read
+
+        r ACL LOAD
+        r PUBLISH test test-message
+
+        # Permissions for 'alice' haven't changed, so they should still be connected
+        assert_match {*test-message*} [$rd1 read]
+
+        # 'mortimer' has been deleted, so their client should get disconnected
+        catch {$rd2 read} e
+        assert_match {*I/O error*} $e
+
+        $rd1 close
+        $rd2 close
+    }
+
     test {ACL load and save} {
         r ACL setuser eve +get allkeys >eve on
         r ACL save
 
-        # ACL load will free user and kill clients
         r ACL load
-        catch {r ACL LIST} e
-        assert_match {*I/O error*} $e
 
-        reconnect
+        # Clients should not be disconnected since permissions haven't changed
+
         r AUTH alice alice
         r SET key value
         r AUTH eve eve
@@ -1024,18 +1146,38 @@ start_server [list overrides [list "dir" $server_path "acl-pubsub-default" "allc
         r ACL setuser harry on nopass resetchannels &test +@all ~*
         r ACL save
 
-        # ACL load will free user and kill clients
         r ACL load
-        catch {r ACL LIST} e
-        assert_match {*I/O error*} $e
 
-        reconnect
+        # Clients should not be disconnected since permissions haven't changed
+
         r AUTH harry anything
         r publish test bar
         catch {r publish test1 bar} e
         r ACL deluser harry
         set e
     } {*NOPERM*channel*}
+
+    set server_path [tmpdir "server.acl"]
+    exec cp -f tests/assets/user.acl $server_path
+    start_server [list overrides [list "dir" $server_path "acl-pubsub-default" "allchannels" "aclfile" "user.acl"] tags [list "repl" "external:skip"]] {
+        set master [srv -1 client]
+        set master_host [srv -1 host]
+        set master_port [srv -1 port]
+        set slave [srv 0 client]
+
+        test {First server should have role slave after SLAVEOF} {
+            $slave slaveof $master_host $master_port
+            wait_for_condition 50 100 {
+                [s 0 master_link_status] eq {up}
+            } else {
+                fail "Replication not started."
+            }
+        }
+
+        test {ACL load on replica when connected to replica} {
+            assert_match {OK} [$slave ACL LOAD]
+        }
+    }
 }
 
 set server_path [tmpdir "resetchannels.acl"]
diff --git a/tests/unit/auth.tcl b/tests/unit/auth.tcl
index 26d125579f4..9532e0bd2eb 100644
--- a/tests/unit/auth.tcl
+++ b/tests/unit/auth.tcl
@@ -70,8 +70,8 @@ start_server {tags {"auth_binary_password external:skip"}} {
 
             # Configure the replica with masterauth
             set loglines [count_log_lines 0]
-            $slave slaveof $master_host $master_port
             $slave config set masterauth "abc"
+            $slave slaveof $master_host $master_port
 
             # Verify replica is not able to sync with master
             wait_for_log_messages 0 {"*Unable to AUTH to MASTER*"} $loglines 1000 10
diff --git a/tests/unit/bitops.tcl b/tests/unit/bitops.tcl
index 1b7db407a38..f50f65dfa0d 100644
--- a/tests/unit/bitops.tcl
+++ b/tests/unit/bitops.tcl
@@ -45,7 +45,19 @@ proc simulate_bit_op {op args} {
 }
 
 start_server {tags {"bitops"}} {
+    test {BITCOUNT against wrong type} {
+        r del mylist
+        r lpush mylist a b c
+        assert_error "*WRONGTYPE*" {r bitcount mylist}
+        assert_error "*WRONGTYPE*" {r bitcount mylist 0 100}
+
+        # with negative indexes where start > end
+        assert_error "*WRONGTYPE*" {r bitcount mylist -6 -7}
+        assert_error "*WRONGTYPE*" {r bitcount mylist -6 -15 bit}
+    }
+
     test {BITCOUNT returns 0 against non existing key} {
+        r del no-key
         assert {[r bitcount no-key] == 0}
         assert {[r bitcount no-key 0 1000 bit] == 0}
     }
@@ -60,6 +72,11 @@ start_server {tags {"bitops"}} {
         r set str "xxxx"
         assert {[r bitcount str -6 -7] == 0}
         assert {[r bitcount str -6 -15 bit] == 0}
+
+        # against non existing key
+        r del str
+        assert {[r bitcount str -6 -7] == 0}
+        assert {[r bitcount str -6 -15 bit] == 0}
     }
 
     catch {unset num}
@@ -130,15 +147,32 @@ start_server {tags {"bitops"}} {
         assert_equal [r bitcount s 0 1000 bit] [count_bits $s]
     }
 
-    test {BITCOUNT syntax error #1} {
-        catch {r bitcount s 0} e
-        set e
-    } {ERR *syntax*}
+    test {BITCOUNT with illegal arguments} {
+        # Used to return 0 for non-existing key instead of errors
+        r del s
+        assert_error {ERR *syntax*} {r bitcount s 0}
+        assert_error {ERR *syntax*} {r bitcount s 0 1 hello}
+        assert_error {ERR *syntax*} {r bitcount s 0 1 hello hello2}
+
+        r set s 1
+        assert_error {ERR *syntax*} {r bitcount s 0}
+        assert_error {ERR *syntax*} {r bitcount s 0 1 hello}
+        assert_error {ERR *syntax*} {r bitcount s 0 1 hello hello2}
+    }
 
-    test {BITCOUNT syntax error #2} {
-        catch {r bitcount s 0 1 hello} e
-        set e
-    } {ERR *syntax*}
+    test {BITCOUNT against non-integer value} {
+        # against existing key
+        r set s 1
+        assert_error {ERR *not an integer*} {r bitcount s a b}
+
+        # against non existing key
+        r del s
+        assert_error {ERR *not an integer*} {r bitcount s a b}
+
+        # against wrong type
+        r lpush s a b c
+        assert_error {ERR *not an integer*} {r bitcount s a b}
+    }
 
     test {BITCOUNT regression test for github issue #582} {
         r del foo
@@ -257,6 +291,41 @@ start_server {tags {"bitops"}} {
         r bitop or x{t} a{t} b{t}
     } {32}
 
+    test {BITPOS against wrong type} {
+        r del mylist
+        r lpush mylist a b c
+        assert_error "*WRONGTYPE*" {r bitpos mylist 0}
+        assert_error "*WRONGTYPE*" {r bitpos mylist 1 10 100}
+    }
+
+    test {BITPOS will illegal arguments} {
+        # Used to return 0 for non-existing key instead of errors
+        r del s
+        assert_error {ERR *syntax*} {r bitpos s 0 1 hello hello2}
+        assert_error {ERR *syntax*} {r bitpos s 0 0 1 hello}
+
+        r set s 1
+        assert_error {ERR *syntax*} {r bitpos s 0 1 hello hello2}
+        assert_error {ERR *syntax*} {r bitpos s 0 0 1 hello}
+    }
+
+    test {BITPOS against non-integer value} {
+        # against existing key
+        r set s 1
+        assert_error {ERR *not an integer*} {r bitpos s a}
+        assert_error {ERR *not an integer*} {r bitpos s 0 a b}
+
+        # against non existing key
+        r del s
+        assert_error {ERR *not an integer*} {r bitpos s b}
+        assert_error {ERR *not an integer*} {r bitpos s 0 a b}
+
+        # against wrong type
+        r lpush s a b c
+        assert_error {ERR *not an integer*} {r bitpos s a}
+        assert_error {ERR *not an integer*} {r bitpos s 1 a b}
+    }
+
     test {BITPOS bit=0 with empty key returns 0} {
         r del str
         assert {[r bitpos str 0] == 0}
diff --git a/tests/unit/cluster/announced-endpoints.tcl b/tests/unit/cluster/announced-endpoints.tcl
index 941a8e0a396..becba2270e3 100644
--- a/tests/unit/cluster/announced-endpoints.tcl
+++ b/tests/unit/cluster/announced-endpoints.tcl
@@ -1,8 +1,12 @@
 start_cluster 2 2 {tags {external:skip cluster}} {
 
     test "Test change cluster-announce-port and cluster-announce-tls-port at runtime" {
-        set baseport [lindex [R 0 config get port] 1]
-        set count [expr [llength $::servers] +1 ]
+        if {$::tls} {
+            set baseport [lindex [R 0 config get tls-port] 1]
+        } else {
+            set baseport [lindex [R 0 config get port] 1]
+        }
+        set count [expr [llength $::servers] + 1]
         set used_port [find_available_port $baseport $count]
 
         R 0 config set cluster-announce-tls-port $used_port
@@ -17,12 +21,16 @@ start_cluster 2 2 {tags {external:skip cluster}} {
 
         R 0 config set cluster-announce-tls-port 0
         R 0 config set cluster-announce-port 0
-        assert_match "*:$baseport@*" [R 0 CLUSTER NODES] 
+        assert_match "*:$baseport@*" [R 0 CLUSTER NODES]
     }
 
     test "Test change cluster-announce-bus-port at runtime" {
-        set baseport [lindex [R 0 config get port] 1]
-        set count [expr [llength $::servers] +1 ]
+        if {$::tls} {
+            set baseport [lindex [R 0 config get tls-port] 1]
+        } else {
+            set baseport [lindex [R 0 config get port] 1]
+        }
+        set count [expr [llength $::servers] + 1]
         set used_port [find_available_port $baseport $count]
 
         # Verify config set cluster-announce-bus-port
diff --git a/tests/unit/cluster/cli.tcl b/tests/unit/cluster/cli.tcl
index 5b7f2492769..ce4629ec92e 100644
--- a/tests/unit/cluster/cli.tcl
+++ b/tests/unit/cluster/cli.tcl
@@ -81,6 +81,23 @@ start_multiple_servers 3 [list overrides $base_conf] {
 
     set node1_rd [redis_deferring_client 0]
 
+    test "use previous hostip in \"cluster-preferred-endpoint-type unknown-endpoint\" mode" {
+        
+        # backup and set cluster-preferred-endpoint-type unknown-endpoint
+        set endpoint_type_before_set [lindex [split [$node1 CONFIG GET cluster-preferred-endpoint-type] " "] 1]
+        $node1 CONFIG SET cluster-preferred-endpoint-type unknown-endpoint
+
+        # when redis-cli not in cluster mode, return MOVE with empty host
+        set slot_for_foo [$node1 CLUSTER KEYSLOT foo]
+        assert_error "*MOVED $slot_for_foo :*" {$node1 set foo bar}
+
+        # when in cluster mode, redirect using previous hostip
+        assert_equal "[exec src/redis-cli -h 127.0.0.1 -p [srv 0 port] -c set foo bar]" {OK}
+        assert_match "[exec src/redis-cli -h 127.0.0.1 -p [srv 0 port] -c get foo]" {bar}
+
+        assert_equal [$node1 CONFIG SET cluster-preferred-endpoint-type "$endpoint_type_before_set"]  {OK}
+    }
+
     test "Sanity test push cmd after resharding" {
         assert_error {*MOVED*} {$node3 lpush key9184688 v1}
 
@@ -310,6 +327,8 @@ test {Migrate the last slot away from a node using redis-cli} {
     }
 }
 
+foreach ip_or_localhost {127.0.0.1 localhost} {
+
 # Test redis-cli --cluster create, add-node with cluster-port.
 # Create five nodes, three with custom cluster_port and two with default values.
 start_server [list overrides [list cluster-enabled yes cluster-node-timeout 1 cluster-port [find_available_port $::baseport $::portcount]]] {
@@ -320,17 +339,12 @@ start_server [list overrides [list cluster-enabled yes cluster-node-timeout 1 cl
 
     # The first three are used to test --cluster create.
     # The last two are used to test --cluster add-node
-    set node1_rd [redis_client 0]
-    set node2_rd [redis_client -1]
-    set node3_rd [redis_client -2]
-    set node4_rd [redis_client -3]
-    set node5_rd [redis_client -4]
 
-    test {redis-cli --cluster create with cluster-port} {
-        exec src/redis-cli --cluster-yes --cluster create \
-                           127.0.0.1:[srv 0 port] \
-                           127.0.0.1:[srv -1 port] \
-                           127.0.0.1:[srv -2 port]
+    test "redis-cli -4 --cluster create using $ip_or_localhost with cluster-port" {
+        exec src/redis-cli -4 --cluster-yes --cluster create \
+                           $ip_or_localhost:[srv 0 port] \
+                           $ip_or_localhost:[srv -1 port] \
+                           $ip_or_localhost:[srv -2 port]
 
         wait_for_condition 1000 50 {
             [CI 0 cluster_state] eq {ok} &&
@@ -346,11 +360,11 @@ start_server [list overrides [list cluster-enabled yes cluster-node-timeout 1 cl
         assert_equal 3 [CI 2 cluster_known_nodes]
     }
 
-    test {redis-cli --cluster add-node with cluster-port} {
+    test "redis-cli -4 --cluster add-node using $ip_or_localhost with cluster-port" {
         # Adding node to the cluster (without cluster-port)
-        exec src/redis-cli --cluster-yes --cluster add-node \
-                           127.0.0.1:[srv -3 port] \
-                           127.0.0.1:[srv 0 port]
+        exec src/redis-cli -4 --cluster-yes --cluster add-node \
+                           $ip_or_localhost:[srv -3 port] \
+                           $ip_or_localhost:[srv 0 port]
 
         wait_for_cluster_size 4
 
@@ -364,9 +378,9 @@ start_server [list overrides [list cluster-enabled yes cluster-node-timeout 1 cl
         }
 
         # Adding node to the cluster (with cluster-port)
-        exec src/redis-cli --cluster-yes --cluster add-node \
-                           127.0.0.1:[srv -4 port] \
-                           127.0.0.1:[srv 0 port]
+        exec src/redis-cli -4 --cluster-yes --cluster add-node \
+                           $ip_or_localhost:[srv -4 port] \
+                           $ip_or_localhost:[srv 0 port]
 
         wait_for_cluster_size 5
 
@@ -394,6 +408,8 @@ start_server [list overrides [list cluster-enabled yes cluster-node-timeout 1 cl
 }
 }
 
+} ;# foreach ip_or_localhost
+
 } ;# tags
 
 set ::singledb $old_singledb
diff --git a/tests/unit/cluster/failure-marking.tcl b/tests/unit/cluster/failure-marking.tcl
new file mode 100644
index 00000000000..c4746c82647
--- /dev/null
+++ b/tests/unit/cluster/failure-marking.tcl
@@ -0,0 +1,53 @@
+# Test a single primary can mark replica as `fail`
+start_cluster 1 1 {tags {external:skip cluster}} {
+
+    test "Verify that single primary marks replica as failed" {
+        set primary [srv -0 client]
+
+        set replica1 [srv -1 client]
+        set replica1_pid [srv -1 pid]
+        set replica1_instance_id [dict get [cluster_get_myself 1] id]
+
+        assert {[lindex [$primary role] 0] eq {master}}
+        assert {[lindex [$replica1 role] 0] eq {slave}}
+
+        wait_for_sync $replica1
+
+        pause_process $replica1_pid
+
+        wait_node_marked_fail 0 $replica1_instance_id
+    }
+}
+
+# Test multiple primaries wait for a quorum and then mark a replica as `fail`
+start_cluster 2 1 {tags {external:skip cluster}} {
+
+    test "Verify that multiple primaries mark replica as failed" {
+        set primary1 [srv -0 client]
+
+        set primary2 [srv -1 client]
+        set primary2_pid [srv -1 pid]
+
+        set replica1 [srv -2 client]
+        set replica1_pid [srv -2 pid]
+        set replica1_instance_id [dict get [cluster_get_myself 2] id]
+
+        assert {[lindex [$primary1 role] 0] eq {master}}
+        assert {[lindex [$primary2 role] 0] eq {master}}
+        assert {[lindex [$replica1 role] 0] eq {slave}}
+
+        wait_for_sync $replica1
+
+        pause_process $replica1_pid
+
+        # Pause other primary to allow time for pfail flag to appear
+        pause_process $primary2_pid
+
+        wait_node_marked_pfail 0 $replica1_instance_id
+
+        # Resume other primary and wait for to show replica as failed
+        resume_process $primary2_pid
+
+        wait_node_marked_fail 0 $replica1_instance_id
+    }
+}
diff --git a/tests/unit/cluster/scripting.tcl b/tests/unit/cluster/scripting.tcl
index 1ade36ea243..76aa882e83a 100644
--- a/tests/unit/cluster/scripting.tcl
+++ b/tests/unit/cluster/scripting.tcl
@@ -30,6 +30,13 @@ start_cluster 1 0 {tags {external:skip cluster}} {
             redis.call('set', 'foo', 'bar'); redis.call('set', 'bar', 'foo')
         } 0
 
+        # Retrieve data from different slot to verify data has been stored in the correct dictionary in cluster-enabled setup
+        # during cross-slot operation from the above lua script.
+        assert_equal "bar" [r 0 get foo]
+        assert_equal "foo" [r 0 get bar]
+        r 0 del foo
+        r 0 del bar
+
         # Functions with allow-cross-slot-keys flag are allowed
         r 0 function load REPLACE {#!lua name=crossslot
             local function test_cross_slot(keys, args)
@@ -40,6 +47,11 @@ start_cluster 1 0 {tags {external:skip cluster}} {
 
             redis.register_function{function_name='test_cross_slot', callback=test_cross_slot, flags={ 'allow-cross-slot-keys' }}}
         r FCALL test_cross_slot 0
+
+        # Retrieve data from different slot to verify data has been stored in the correct dictionary in cluster-enabled setup
+        # during cross-slot operation from the above lua function.
+        assert_equal "bar" [r 0 get foo]
+        assert_equal "foo" [r 0 get bar]
     }
     
     test {Cross slot commands are also blocked if they disagree with pre-declared keys} {
@@ -50,6 +62,15 @@ start_cluster 1 0 {tags {external:skip cluster}} {
             } 1 bar}
     }
 
+    test {Cross slot commands are allowed by default if they disagree with pre-declared keys} {
+        r 0 flushall
+        r 0 eval "redis.call('set', 'foo', 'bar')" 1 bar
+
+        # Make sure the script writes to the right slot
+        assert_equal 1 [r 0 cluster COUNTKEYSINSLOT 12182] ;# foo slot
+        assert_equal 0 [r 0 cluster COUNTKEYSINSLOT 5061] ;# bar slot
+    }
+
     test "Function no-cluster flag" {
         R 0 function load {#!lua name=test
             redis.register_function{function_name='f1', callback=function() return 'hello' end, flags={'no-cluster'}}
diff --git a/tests/unit/cluster/sharded-pubsub.tcl b/tests/unit/cluster/sharded-pubsub.tcl
new file mode 100644
index 00000000000..66a1ca2fb49
--- /dev/null
+++ b/tests/unit/cluster/sharded-pubsub.tcl
@@ -0,0 +1,66 @@
+#
+# Copyright (c) 2009-Present, Redis Ltd.
+# All rights reserved.
+#
+# Licensed under your choice of the Redis Source Available License 2.0
+# (RSALv2) or the Server Side Public License v1 (SSPLv1).
+#
+# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
+#
+
+start_cluster 1 1 {tags {external:skip cluster}} {
+    set primary_id 0
+    set replica1_id 1
+
+    set primary [Rn $primary_id]
+    set replica [Rn $replica1_id]
+
+    test "Sharded pubsub publish behavior within multi/exec" {
+        foreach {node} {primary replica} {
+            set node [set $node]
+            $node MULTI
+            $node SPUBLISH ch1 "hello"
+            $node EXEC
+        }
+    }
+
+    test "Sharded pubsub within multi/exec with cross slot operation" {
+        $primary MULTI
+        $primary SPUBLISH ch1 "hello"
+        $primary GET foo
+        catch {[$primary EXEC]} err
+        assert_match {CROSSSLOT*} $err
+    }
+
+    test "Sharded pubsub publish behavior within multi/exec with read operation on primary" {
+        $primary MULTI
+        $primary SPUBLISH foo "hello"
+        $primary GET foo
+        $primary EXEC
+    } {0 {}}
+
+    test "Sharded pubsub publish behavior within multi/exec with read operation on replica" {
+        $replica MULTI
+        $replica SPUBLISH foo "hello"
+        catch {[$replica GET foo]} err
+        assert_match {MOVED*} $err
+        catch {[$replica EXEC]} err
+        assert_match {EXECABORT*} $err
+    }
+
+    test "Sharded pubsub publish behavior within multi/exec with write operation on primary" {
+        $primary MULTI
+        $primary SPUBLISH foo "hello"
+        $primary SET foo bar
+        $primary EXEC
+    } {0 OK}
+
+    test "Sharded pubsub publish behavior within multi/exec with write operation on replica" {
+        $replica MULTI
+        $replica SPUBLISH foo "hello"
+        catch {[$replica SET foo bar]} err
+        assert_match {MOVED*} $err
+        catch {[$replica EXEC]} err
+        assert_match {EXECABORT*} $err
+    }
+}
diff --git a/tests/unit/expire.tcl b/tests/unit/expire.tcl
index fc0ef61e29c..08fa88a1053 100644
--- a/tests/unit/expire.tcl
+++ b/tests/unit/expire.tcl
@@ -192,8 +192,8 @@ start_server {tags {"expire"}} {
         # two seconds.
         wait_for_condition 20 100 {
             [r dbsize] eq 0
-        } fail {
-            "Keys did not actively expire."
+        } else {
+            fail "Keys did not actively expire."
         }
     }
 
@@ -350,7 +350,7 @@ start_server {tags {"expire"}} {
             r restore foo17 100000 $encoded
             r restore foo18 [expr [clock milliseconds]+100000] $encoded absttl
 
-            # Assert that each TTL-relatd command are persisted with absolute timestamps in AOF
+            # Assert that each TTL-related command are persisted with absolute timestamps in AOF
             assert_aof_content $aof {
                 {select *}
                 {set foo1 bar PXAT *}
@@ -378,8 +378,8 @@ start_server {tags {"expire"}} {
                 {set foo15 bar}
                 {pexpireat foo15 *}
                 {set foo16 bar}
-                {restore foo17 * {*} ABSTTL}
-                {restore foo18 * {*} absttl}
+                {restore foo17 * * ABSTTL}
+                {restore foo18 * * absttl}
             }
 
             # Remember the absolute TTLs of all the keys
@@ -507,8 +507,8 @@ start_server {tags {"expire"}} {
             {pexpireat foo4 *}
             {pexpireat foo4 *}
             {set foo5 bar}
-            {restore foo6 * {*} ABSTTL}
-            {restore foo7 * {*} absttl}
+            {restore foo6 * * ABSTTL}
+            {restore foo7 * * absttl}
         }
         close_replication_stream $repl
     } {} {needs:repl}
@@ -833,3 +833,69 @@ start_server {tags {"expire"}} {
         assert_equal [r debug set-active-expire 1] {OK}
     } {} {needs:debug}
 }
+
+start_cluster 1 0 {tags {"expire external:skip cluster"}} {
+    test "expire scan should skip dictionaries with lot's of empty buckets" {
+        r debug set-active-expire 0
+
+        # Collect two slots to help determine the expiry scan logic is able
+        # to go past certain slots which aren't valid for scanning at the given point of time.
+        # And the next non empty slot after that still gets scanned and expiration happens.
+
+        # hashslot(alice) is 749
+        r psetex alice 500 val
+
+        # hashslot(foo) is 12182
+        # fill data across different slots with expiration
+        for {set j 1} {$j <= 100} {incr j} {
+            r psetex "{foo}$j" 500 a
+        }
+        # hashslot(key) is 12539
+        r psetex key 500 val
+
+        # disable resizing, the reason for not using slow bgsave is because
+        # it will hit the dict_force_resize_ratio.
+        r debug dict-resizing 0
+
+        # delete data to have lot's (99%) of empty buckets (slot 12182 should be skipped)
+        for {set j 1} {$j <= 99} {incr j} {
+            r del "{foo}$j"
+        }
+
+        # Trigger a full traversal of all dictionaries.
+        r keys *
+
+        r debug set-active-expire 1
+
+        # Verify {foo}100 still exists and remaining got cleaned up
+        wait_for_condition 20 100 {
+            [r dbsize] eq 1
+        } else {
+            if {[r dbsize] eq 0} {
+                puts [r debug htstats 0]
+                fail "scan didn't handle slot skipping logic."
+            } else {
+                puts [r debug htstats 0]
+                fail "scan didn't process all valid slots."
+            }
+        }
+
+        # Enable resizing
+        r debug dict-resizing 1
+
+        # put some data into slot 12182 and trigger the resize
+        r psetex "{foo}0" 500 a
+
+        # Verify all keys have expired
+        wait_for_condition 400 100 {
+            [r dbsize] eq 0
+        } else {
+            puts [r dbsize]
+            flush stdout
+            fail "Keys did not actively expire."
+        }
+
+        # Make sure we don't have any timeouts.
+        assert_equal 0 [s 0 expired_time_cap_reached_count]
+    } {} {needs:debug}
+}
diff --git a/tests/unit/functions.tcl b/tests/unit/functions.tcl
index 9e8ec08f327..90d4bb80176 100644
--- a/tests/unit/functions.tcl
+++ b/tests/unit/functions.tcl
@@ -64,7 +64,8 @@ start_server {tags {"scripting"}} {
     } {hello1}
 
     test {FUNCTION - test replace argument with failure keeps old libraries} {
-         catch {r function create LUA test REPLACE {error}}
+        catch {r function load REPLACE [get_function_code LUA test test {error}]} e
+        assert_match {ERR Error compiling function*} $e
         r fcall test 0
     } {hello1}
 
diff --git a/tests/unit/info.tcl b/tests/unit/info.tcl
index 812704340fd..6e2d381f571 100644
--- a/tests/unit/info.tcl
+++ b/tests/unit/info.tcl
@@ -274,6 +274,27 @@ start_server {tags {"info" "external:skip"}} {
             $rd close
         }
 
+        test {errorstats: limit errors will not increase indefinitely} {
+            r config resetstat
+            for {set j 1} {$j <= 1100} {incr j} {
+                assert_error "$j my error message" {
+                    r eval {return redis.error_reply(string.format('%s my error message', ARGV[1]))} 0 $j
+                }
+            }
+
+            assert_equal [count_log_message 0 "Errorstats stopped adding new errors"] 1
+            assert_equal [count_log_message 0 "Current errors code list"] 1
+            assert_equal "count=1" [errorstat ERRORSTATS_DISABLED]
+
+            # Since we currently have no metrics exposed for server.errors, we use lazyfree
+            # to verify that we only have 128 errors.
+            wait_for_condition 50 100 {
+                [s lazyfreed_objects] eq 128
+            } else {
+                fail "errorstats resetstat lazyfree error"
+            }
+        }
+
         test {stats: eventloop metrics} {
             set info1 [r info stats]
             set cycle1 [getInfoProperty $info1 eventloop_cycles]
@@ -300,15 +321,21 @@ start_server {tags {"info" "external:skip"}} {
 
         test {stats: instantaneous metrics} {
             r config resetstat
-            after 1600 ;# hz is 10, wait for 16 cron tick so that sample array is fulfilled
-            set value [s instantaneous_eventloop_cycles_per_sec]
+            set retries 0
+            for {set retries 1} {$retries < 4} {incr retries} {
+                after 1600 ;# hz is 10, wait for 16 cron tick so that sample array is fulfilled
+                set value [s instantaneous_eventloop_cycles_per_sec]
+                if {$value > 0} break
+            }
+
+            assert_lessthan $retries 4
             if {$::verbose} { puts "instantaneous metrics instantaneous_eventloop_cycles_per_sec: $value" }
             assert_morethan $value 0
-            assert_lessthan $value 15 ;# default hz is 10
+            assert_lessthan $value [expr $retries*15] ;# default hz is 10
             set value [s instantaneous_eventloop_duration_usec]
             if {$::verbose} { puts "instantaneous metrics instantaneous_eventloop_duration_usec: $value" }
             assert_morethan $value 0
-            assert_lessthan $value 22000 ;# default hz is 10, so duration < 1000 / 10, allow some tolerance
+            assert_lessthan $value [expr $retries*22000] ;# default hz is 10, so duration < 1000 / 10, allow some tolerance
         }
 
         test {stats: debug metrics} {
@@ -342,5 +369,156 @@ start_server {tags {"info" "external:skip"}} {
             assert {$duration_max2 >= $duration_max1}
         }
 
+        test {stats: client input and output buffer limit disconnections} {
+            r config resetstat
+            set info [r info stats]
+            assert_equal [getInfoProperty $info client_query_buffer_limit_disconnections] {0}
+            assert_equal [getInfoProperty $info client_output_buffer_limit_disconnections] {0}
+            # set qbuf limit to minimum to test stat
+            set org_qbuf_limit [lindex [r config get client-query-buffer-limit] 1]
+            r config set client-query-buffer-limit 1048576
+            catch {r set key [string repeat a 1048576]}
+            set info [r info stats]
+            assert_equal [getInfoProperty $info client_query_buffer_limit_disconnections] {1}
+            r config set client-query-buffer-limit $org_qbuf_limit
+            # set outbuf limit to just 10 to test stat
+            set org_outbuf_limit [lindex [r config get client-output-buffer-limit] 1]
+            r config set client-output-buffer-limit "normal 10 0 0"
+            r set key [string repeat a 100000] ;# to trigger output buffer limit check this needs to be big
+            catch {r get key}
+            set info [r info stats]
+            assert_equal [getInfoProperty $info client_output_buffer_limit_disconnections] {1}
+            r config set client-output-buffer-limit $org_outbuf_limit
+        } {OK} {logreqres:skip} ;# same as obuf-limits.tcl, skip logreqres
+
+        test {clients: pubsub clients} {
+            set info [r info clients]
+            assert_equal [getInfoProperty $info pubsub_clients] {0}
+            set rd1 [redis_deferring_client]
+            set rd2 [redis_deferring_client]
+            # basic count
+            assert_equal {1} [ssubscribe $rd1 {chan1}]
+            assert_equal {1} [subscribe $rd2 {chan2}]
+            set info [r info clients]
+            assert_equal [getInfoProperty $info pubsub_clients] {2}
+            # unsubscribe non existing channel
+            assert_equal {1} [unsubscribe $rd2 {non-exist-chan}]
+            set info [r info clients]
+            assert_equal [getInfoProperty $info pubsub_clients] {2}
+            # count change when client unsubscribe all channels
+            assert_equal {0} [unsubscribe $rd2 {chan2}]
+            set info [r info clients]
+            assert_equal [getInfoProperty $info pubsub_clients] {1}
+            # non-pubsub clients should not be involved
+            assert_equal {0} [unsubscribe $rd2 {non-exist-chan}]
+            set info [r info clients]
+            assert_equal [getInfoProperty $info pubsub_clients] {1}
+            # close all clients
+            $rd1 close
+            $rd2 close
+            wait_for_condition 100 50 {
+                [getInfoProperty [r info clients] pubsub_clients] eq {0}
+            } else {
+                fail "pubsub clients did not clear"
+            }
+        }
+
+        test {clients: watching clients} {
+            set r2 [redis_client]
+            assert_equal [s watching_clients] 0
+            assert_equal [s total_watched_keys] 0
+            assert_match {*watch=0*} [r client info]
+            assert_match {*watch=0*} [$r2 client info]
+            # count after watch key
+            $r2 watch key
+            assert_equal [s watching_clients] 1
+            assert_equal [s total_watched_keys] 1
+            assert_match {*watch=0*} [r client info]
+            assert_match {*watch=1*} [$r2 client info]
+            # the same client watch the same key has no effect
+            $r2 watch key
+            assert_equal [s watching_clients] 1
+            assert_equal [s total_watched_keys] 1
+            assert_match {*watch=0*} [r client info]
+            assert_match {*watch=1*} [$r2 client info]
+            # different client watch different key
+            r watch key2
+            assert_equal [s watching_clients] 2
+            assert_equal [s total_watched_keys] 2
+            assert_match {*watch=1*} [$r2 client info]
+            assert_match {*watch=1*} [r client info]
+            # count after unwatch
+            r unwatch
+            assert_equal [s watching_clients] 1
+            assert_equal [s total_watched_keys] 1
+            assert_match {*watch=0*} [r client info]
+            assert_match {*watch=1*} [$r2 client info]
+            $r2 unwatch
+            assert_equal [s watching_clients] 0
+            assert_equal [s total_watched_keys] 0
+            assert_match {*watch=0*} [r client info]
+            assert_match {*watch=0*} [$r2 client info]
+
+            # count after watch/multi/exec
+            $r2 watch key
+            assert_equal [s watching_clients] 1
+            $r2 multi
+            $r2 exec
+            assert_equal [s watching_clients] 0
+            # count after watch/multi/discard
+            $r2 watch key
+            assert_equal [s watching_clients] 1
+            $r2 multi
+            $r2 discard
+            assert_equal [s watching_clients] 0
+            # discard without multi has no effect
+            $r2 watch key
+            assert_equal [s watching_clients] 1
+            catch {$r2 discard} e
+            assert_equal [s watching_clients] 1
+            # unwatch without watch has no effect
+            r unwatch
+            assert_equal [s watching_clients] 1
+            # after disconnect, since close may arrive later, or the client may
+            # be freed asynchronously, we use a wait_for_condition
+            $r2 close
+            wait_for_watched_clients_count 0
+        }
+    }
+}
+
+start_server {tags {"info" "external:skip"}} {
+    test {memory: database and pubsub overhead and rehashing dict count} {
+        r flushall
+        set info_mem [r info memory]
+        set mem_stats [r memory stats]
+        assert_equal [getInfoProperty $info_mem mem_overhead_db_hashtable_rehashing] {0}
+        assert_equal [dict get $mem_stats overhead.db.hashtable.lut] {0}
+        assert_equal [dict get $mem_stats overhead.db.hashtable.rehashing] {0}
+        assert_equal [dict get $mem_stats db.dict.rehashing.count] {0}
+        # Initial dict expand is not rehashing
+        r set a b
+        set info_mem [r info memory]
+        set mem_stats [r memory stats]
+        assert_equal [getInfoProperty $info_mem mem_overhead_db_hashtable_rehashing] {0}
+        assert_range [dict get $mem_stats overhead.db.hashtable.lut] 1 64
+        assert_equal [dict get $mem_stats overhead.db.hashtable.rehashing] {0}
+        assert_equal [dict get $mem_stats db.dict.rehashing.count] {0}
+        # set 4 more keys to trigger rehashing
+        # get the info within a transaction to make sure the rehashing is not completed
+        r multi 
+        r set b c
+        r set c d
+        r set d e
+        r set e f
+        r info memory
+        r memory stats
+        set res [r exec]
+        set info_mem [lindex $res 4]
+        set mem_stats [lindex $res 5]
+        assert_range [getInfoProperty $info_mem mem_overhead_db_hashtable_rehashing] 1 64
+        assert_range [dict get $mem_stats overhead.db.hashtable.lut] 1 192
+        assert_range [dict get $mem_stats overhead.db.hashtable.rehashing] 1 64
+        assert_equal [dict get $mem_stats db.dict.rehashing.count] {1}
     }
 }
diff --git a/tests/unit/introspection.tcl b/tests/unit/introspection.tcl
index 8132ee19783..194ff09e00b 100644
--- a/tests/unit/introspection.tcl
+++ b/tests/unit/introspection.tcl
@@ -7,7 +7,7 @@ start_server {tags {"introspection"}} {
 
     test {CLIENT LIST} {
         r client list
-    } {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 qbuf=26 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|list user=* redir=-1 resp=*}
+    } {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=26 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|list user=* redir=-1 resp=*}
 
     test {CLIENT LIST with IDs} {
         set myid [r client id]
@@ -17,7 +17,7 @@ start_server {tags {"introspection"}} {
 
     test {CLIENT INFO} {
         r client info
-    } {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 qbuf=26 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|info user=* redir=-1 resp=*}
+    } {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=26 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|info user=* redir=-1 resp=*}
 
     test {CLIENT KILL with illegal arguments} {
         assert_error "ERR wrong number of arguments for 'client|kill' command" {r client kill}
@@ -32,8 +32,52 @@ start_server {tags {"introspection"}} {
         assert_error "ERR No such user*" {r client kill user wrong_user}
 
         assert_error "ERR syntax error*" {r client kill skipme yes_or_no}
+
+        assert_error "ERR *not an integer or out of range*" {r client kill maxage str}
+        assert_error "ERR *not an integer or out of range*" {r client kill maxage 9999999999999999999}
+        assert_error "ERR *greater than 0*" {r client kill maxage -1}
     }
 
+    test {CLIENT KILL maxAGE will kill old clients} {
+        # This test is very likely to do a false positive if the execute time
+        # takes longer than the max age, so give it a few more chances. Go with
+        # 3 retries of increasing sleep_time, i.e. start with 2s, then go 4s, 8s.
+        set sleep_time 2
+        for {set i 0} {$i < 3} {incr i} {
+            set rd1 [redis_deferring_client]
+            r debug sleep $sleep_time
+            set rd2 [redis_deferring_client]
+            r acl setuser dummy on nopass +ping
+            $rd1 auth dummy ""
+            $rd1 read
+            $rd2 auth dummy ""
+            $rd2 read
+
+            # Should kill rd1 but not rd2
+            set max_age [expr $sleep_time / 2]
+            set res [r client kill user dummy maxage $max_age]
+            if {$res == 1} {
+                break
+            } else {
+                # Clean up and try again next time
+                set sleep_time [expr $sleep_time * 2]
+                $rd1 close
+                $rd2 close
+            }
+
+        } ;# for
+
+        if {$::verbose} { puts "CLIENT KILL maxAGE will kill old clients test attempts: $i" }
+        assert_equal $res 1
+
+        # rd2 should still be connected
+        $rd2 ping
+        assert_equal "PONG" [$rd2 read]
+
+        $rd1 close
+        $rd2 close
+    } {0} {"needs:debug"}
+
     test {CLIENT KILL SKIPME YES/NO will kill all clients} {
         # Kill all clients except `me`
         set rd1 [redis_deferring_client]
@@ -61,6 +105,29 @@ start_server {tags {"introspection"}} {
         $rd4 close
     }
 
+    test {CLIENT command unhappy path coverage} {
+        assert_error "ERR*wrong number of arguments*" {r client caching}
+        assert_error "ERR*when the client is in tracking mode*" {r client caching maybe}
+        assert_error "ERR*syntax*" {r client no-evict wrongInput}
+        assert_error "ERR*syntax*" {r client reply wrongInput}
+        assert_error "ERR*syntax*" {r client tracking wrongInput}
+        assert_error "ERR*syntax*" {r client tracking on wrongInput}
+        assert_error "ERR*when the client is in tracking mode*" {r client caching off}
+        assert_error "ERR*when the client is in tracking mode*" {r client caching on}
+
+        r CLIENT TRACKING ON optout
+        assert_error "ERR*syntax*" {r client caching on}
+
+        r CLIENT TRACKING off optout
+        assert_error "ERR*when the client is in tracking mode*" {r client caching on}
+
+        assert_error "ERR*No such*" {r client kill 000.123.321.567:0000}
+        assert_error "ERR*No such*" {r client kill 127.0.0.1:}
+
+        assert_error "ERR*timeout is not an integer*" {r client pause abc}
+        assert_error "ERR timeout is negative" {r client pause -1}
+    }
+
     test "CLIENT KILL close the client connection during bgsave" {
         # Start a slow bgsave, trigger an active fork.
         r flushall
@@ -271,6 +338,11 @@ start_server {tags {"introspection"}} {
         r client getname
     } {}
 
+    test {CLIENT GETNAME check if name set correctly} {
+        r client setname testName
+        r client getName
+    } {testName}
+
     test {CLIENT LIST shows empty fields for unassigned names} {
         r client list
     } {*name= *}
@@ -379,6 +451,10 @@ start_server {tags {"introspection"}} {
             replicaof
             slaveof
             requirepass
+            server-cpulist
+            bio-cpulist
+            aof-rewrite-cpulist
+            bgsave-cpulist
             server_cpulist
             bio_cpulist
             aof_rewrite_cpulist
diff --git a/tests/unit/keyspace.tcl b/tests/unit/keyspace.tcl
index 43690d06b32..d11cf836590 100644
--- a/tests/unit/keyspace.tcl
+++ b/tests/unit/keyspace.tcl
@@ -34,6 +34,14 @@ start_server {tags {"keyspace"}} {
         r dbsize
     } {6}
 
+    test {KEYS with hashtag} {
+        foreach key {"{a}x" "{a}y" "{a}z" "{b}a" "{b}b" "{b}c"} {
+            r set $key hello
+        }
+        assert_equal [lsort [r keys "{a}*"]] [list "{a}x" "{a}y" "{a}z"]
+        assert_equal [lsort [r keys "*{b}*"]] [list "{b}a" "{b}b" "{b}c"]
+    } 
+
     test {DEL all keys} {
         foreach key [r keys *] {r del $key}
         r dbsize
@@ -244,7 +252,7 @@ start_server {tags {"keyspace"}} {
         assert {[r get mynewkey{t}] eq "foobar"}
     }
 
-source "tests/unit/type/list-common.tcl"
+array set largevalue [generate_largevalue_test_array]
 foreach {type large} [array get largevalue] {
     set origin_config [config_get_set list-max-listpack-size -1]
     test "COPY basic usage for list - $type" {
@@ -318,6 +326,7 @@ foreach {type large} [array get largevalue] {
     }
 
     test {COPY basic usage for listpack hash} {
+        r config set hash-max-listpack-entries 512
         r del hash1{t} newhash1{t}
         r hset hash1{t} tmp 17179869184
         assert_encoding listpack hash1{t}
@@ -499,4 +508,44 @@ foreach {type large} [array get largevalue] {
         r SET aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 1
         r KEYS "a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*b"
     } {}
+
+    test {Coverage: basic SWAPDB test and unhappy path} {
+       r flushall
+       r select 0
+       r set swapkey v1
+       r select 1
+       assert_match 0 [r dbsize] ;#verify DB[1] has 0 keys
+       r swapdb 0 1
+       assert_match 1 [r dbsize]
+       r select 0
+       assert_match 0 [r dbsize] ;#verify DB[0] has 0 keys
+       r flushall
+       assert_error "ERR DB index is out of range*" {r swapdb 44 55}
+       assert_error "ERR invalid second DB index*" {r swapdb 44 a}
+       assert_error "ERR invalid first DB index*" {r swapdb a 55}
+       assert_error "ERR invalid first DB index*" {r swapdb a b}
+       assert_match "OK" [r swapdb 0 0]
+    } {} {singledb:skip}
+
+    test {Coverage: SWAPDB and FLUSHDB} {
+       # set a key in each db and swapdb one of 2 with different db
+       # and flushdb on swapped db.
+       r flushall
+       r select 0
+       r set swapkey v1
+       r select 1
+       r set swapkey1 v1
+       assert_no_match "*db2:keys=*" [r info keyspace]
+       r swapdb 0 2
+       r select 0
+       assert_match 0 [r dbsize]
+       assert_no_match "*db0:keys=*" [r info keyspace]
+       r select 2
+       r flushdb
+       assert_match 0 [r dbsize]
+       assert_match "*db1:keys=*" [r info keyspace]
+       assert_no_match "*db0:keys=*" [r info keyspace]
+       assert_no_match "*db2:keys=*" [r info keyspace]
+       r flushall
+    } {OK} {singledb:skip}
 }
diff --git a/tests/unit/latency-monitor.tcl b/tests/unit/latency-monitor.tcl
index 499feceacd1..9e714c11983 100644
--- a/tests/unit/latency-monitor.tcl
+++ b/tests/unit/latency-monitor.tcl
@@ -72,19 +72,30 @@ start_server {tags {"latency-monitor needs:latency"}} {
     }
 
 tags {"needs:debug"} {
+    set old_threshold_value [lindex [r config get latency-monitor-threshold] 1]
+
     test {Test latency events logging} {
+        r config set latency-monitor-threshold 200
+        r latency reset
         r debug sleep 0.3
         after 1100
         r debug sleep 0.4
         after 1100
         r debug sleep 0.5
+        r config set latency-monitor-threshold 0
         assert {[r latency history command] >= 3}
     }
 
     test {LATENCY HISTORY output is ok} {
+        set res [r latency history command]
+        if {$::verbose} {
+            puts "LATENCY HISTORY data:"
+            puts $res
+        }
+
         set min 250
         set max 450
-        foreach event [r latency history command] {
+        foreach event $res {
             lassign $event time latency
             if {!$::no_latency} {
                 assert {$latency >= $min && $latency <= $max}
@@ -96,7 +107,13 @@ tags {"needs:debug"} {
     }
 
     test {LATENCY LATEST output is ok} {
-        foreach event [r latency latest] {
+        set res [r latency latest]
+        if {$::verbose} {
+            puts "LATENCY LATEST data:"
+            puts $res
+        }
+
+        foreach event $res {
             lassign $event eventname time latency max
             assert {$eventname eq "command"}
             if {!$::no_latency} {
@@ -109,6 +126,10 @@ tags {"needs:debug"} {
 
     test {LATENCY GRAPH can output the event graph} {
         set res [r latency graph command]
+        if {$::verbose} {
+            puts "LATENCY GRAPH data:"
+            puts $res
+        }
         assert_match {*command*high*low*} $res
 
         # These numbers are taken from the "Test latency events logging" test.
@@ -117,6 +138,8 @@ tags {"needs:debug"} {
         assert_morethan_equal $high 500
         assert_morethan_equal $low 300
     }
+
+    r config set latency-monitor-threshold $old_threshold_value
 } ;# tag
 
     test {LATENCY of expire events are correctly collected} {
diff --git a/tests/unit/lazyfree.tcl b/tests/unit/lazyfree.tcl
index 17f460003a4..b4ade403115 100644
--- a/tests/unit/lazyfree.tcl
+++ b/tests/unit/lazyfree.tcl
@@ -87,4 +87,91 @@ start_server {tags {"lazyfree"}} {
         }
         assert_equal [s lazyfreed_objects] 0
     } {} {needs:config-resetstat}
+
+    test "FLUSHALL SYNC optimized to run in bg as blocking FLUSHALL ASYNC" {
+        set num_keys 1000
+        r config resetstat
+
+        # Verify at start there are no lazyfree pending objects
+        assert_equal [s lazyfree_pending_objects] 0
+
+        # Fillup DB with items
+        populate $num_keys
+
+        # Run FLUSHALL SYNC command, optimized as blocking ASYNC
+        r flushall
+
+        # Verify all keys counted as lazyfreed
+        assert_equal [s lazyfreed_objects] $num_keys
+    }
+
+    test "Run consecutive blocking FLUSHALL ASYNC successfully" {
+        r config resetstat
+        set rd [redis_deferring_client]
+
+        # Fillup DB with items
+        r set x 1
+        r set y 2
+
+        $rd write "FLUSHALL\r\nFLUSHALL\r\nFLUSHDB\r\n"
+        $rd flush
+        assert_equal [$rd read] {OK}
+        assert_equal [$rd read] {OK}
+        assert_equal [$rd read] {OK}
+        assert_equal [s lazyfreed_objects] 2
+        $rd close
+    }
+
+    test "FLUSHALL SYNC in MULTI not optimized to run as blocking FLUSHALL ASYNC" {
+        r config resetstat
+
+        # Fillup DB with items
+        r set x 11
+        r set y 22
+
+        # FLUSHALL SYNC in multi
+        r multi
+        r flushall
+        r exec
+
+        # Verify flushall not run as lazyfree
+        assert_equal [s lazyfree_pending_objects] 0
+        assert_equal [s lazyfreed_objects] 0
+    }
+
+    test "Client closed in the middle of blocking FLUSHALL ASYNC" {
+        set num_keys 100000
+        r config resetstat
+
+        # Fillup DB with items
+        populate $num_keys
+
+        # close client in the middle of ongoing Blocking FLUSHALL ASYNC
+        set rd [redis_deferring_client]
+        $rd flushall
+        $rd close
+
+        # Wait to verify all keys counted as lazyfreed
+        wait_for_condition 50 100 {
+            [s lazyfreed_objects] == $num_keys
+        } else {
+            fail "Unexpected number of lazyfreed_objects: [s lazyfreed_objects]"
+        }
+    }
+
+    test "Pending commands in querybuf processed once unblocking FLUSHALL ASYNC" {
+        r config resetstat
+        set rd [redis_deferring_client]
+
+        # Fillup DB with items
+        r set x 1
+        r set y 2
+
+        $rd write "FLUSHALL\r\nPING\r\n"
+        $rd flush
+        assert_equal [$rd read] {OK}
+        assert_equal [$rd read] {PONG}
+        assert_equal [s lazyfreed_objects] 2
+        $rd close
+    }
 }
diff --git a/tests/unit/maxmemory.tcl b/tests/unit/maxmemory.tcl
index 89eaf9bbae9..363dab4725f 100644
--- a/tests/unit/maxmemory.tcl
+++ b/tests/unit/maxmemory.tcl
@@ -421,11 +421,16 @@ start_server {tags {"maxmemory external:skip"}} {
         r config set maxmemory-policy allkeys-random
 
         # Next rehash size is 8192, that will eat 64k memory
-        populate 4096 "" 1
+        populate 4095 "" 1
 
         set used [s used_memory]
         set limit [expr {$used + 10*1024}]
         r config set maxmemory $limit
+
+        # Adding a key to meet the 1:1 radio.
+        r set k0 v0
+        # The dict has reached 4096, it can be resized in tryResizeHashTables in cron,
+        # or we add a key to let it check whether it can be resized.
         r set k1 v1
         # Next writing command will trigger evicting some keys if last
         # command trigger DB dict rehash
diff --git a/tests/unit/memefficiency.tcl b/tests/unit/memefficiency.tcl
index d821c7adc8b..788f835c2fc 100644
--- a/tests/unit/memefficiency.tcl
+++ b/tests/unit/memefficiency.tcl
@@ -37,9 +37,9 @@ start_server {tags {"memefficiency external:skip"}} {
 }
 
 run_solo {defrag} {
-start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-rewrite-percentage 0 save ""}} {
+    proc test_active_defrag {type} {
     if {[string match {*jemalloc*} [s mem_allocator]] && [r debug mallctl arenas.page] <= 8192} {
-        test "Active defrag" {
+        test "Active defrag main dictionary: $type" {
             r config set hz 100
             r config set activedefrag no
             r config set active-defrag-threshold-lower 5
@@ -50,7 +50,11 @@ start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-r
             r config set maxmemory-policy allkeys-lru
 
             populate 700000 asdf1 150
+            populate 100 asdf1 150 0 false 1000
             populate 170000 asdf2 300
+            populate 100 asdf2 300 0 false 1000
+
+            assert {[scan [regexp -inline {expires\=([\d]*)} [r info keyspace]] expires=%d] > 0}
             after 120 ;# serverCron only updates the info once in 100ms
             set frag [s allocator_frag_ratio]
             if {$::verbose} {
@@ -67,11 +71,24 @@ start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-r
                 # Wait for the active defrag to start working (decision once a
                 # second).
                 wait_for_condition 50 100 {
-                    [s active_defrag_running] ne 0
+                    [s total_active_defrag_time] ne 0
                 } else {
+                    after 120 ;# serverCron only updates the info once in 100ms
+                    puts [r info memory]
+                    puts [r info stats]
+                    puts [r memory malloc-stats]
                     fail "defrag not started."
                 }
 
+                # This test usually runs for a while, during this interval, we test the range.
+                assert_range [s active_defrag_running] 65 75
+                r config set active-defrag-cycle-min 1
+                r config set active-defrag-cycle-max 1
+                after 120 ;# serverCron only updates the info once in 100ms
+                assert_range [s active_defrag_running] 1 1
+                r config set active-defrag-cycle-min 65
+                r config set active-defrag-cycle-max 75
+
                 # Wait for the active defrag to stop working.
                 wait_for_condition 2000 100 {
                     [s active_defrag_running] eq 0
@@ -115,7 +132,7 @@ start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-r
             r save ;# saving an rdb iterates over all the data / pointers
 
             # if defrag is supported, test AOF loading too
-            if {[r config get activedefrag] eq "activedefrag yes"} {
+            if {[r config get activedefrag] eq "activedefrag yes" && $type eq "standalone"} {
             test "Active defrag - AOF loading" {
                 # reset stats and load the AOF file
                 r config resetstat
@@ -159,8 +176,8 @@ start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-r
         }
         r config set appendonly no
         r config set key-load-delay 0
-        
-        test "Active defrag eval scripts" {
+
+        test "Active defrag eval scripts: $type" {
             r flushdb
             r script flush sync
             r config resetstat
@@ -171,7 +188,7 @@ start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-r
             r config set active-defrag-cycle-max 75
             r config set active-defrag-ignore-bytes 1500kb
             r config set maxmemory 0
-            
+
             set n 50000
 
             # Populate memory with interleaving script-key pattern of same size
@@ -192,9 +209,9 @@ start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-r
                 puts "rss [s allocator_active]"
                 puts "frag [s allocator_frag_ratio]"
                 puts "frag_bytes [s allocator_frag_bytes]"
-            }                    
+            }
             assert_lessthan [s allocator_frag_ratio] 1.05
-            
+
             # Delete all the keys to create fragmentation
             for {set j 0} {$j < $n} {incr j} { $rd del k$j }
             for {set j 0} {$j < $n} {incr j} { $rd read } ; # Discard del replies
@@ -205,7 +222,7 @@ start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-r
                 puts "rss [s allocator_active]"
                 puts "frag [s allocator_frag_ratio]"
                 puts "frag_bytes [s allocator_frag_bytes]"
-            }                    
+            }
             assert_morethan [s allocator_frag_ratio] 1.4
 
             catch {r config set activedefrag yes} e
@@ -213,8 +230,12 @@ start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-r
             
                 # wait for the active defrag to start working (decision once a second)
                 wait_for_condition 50 100 {
-                    [s active_defrag_running] ne 0
+                    [s total_active_defrag_time] ne 0
                 } else {
+                    after 120 ;# serverCron only updates the info once in 100ms
+                    puts [r info memory]
+                    puts [r info stats]
+                    puts [r memory malloc-stats]
                     fail "defrag not started."
                 }
 
@@ -235,14 +256,14 @@ start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-r
                     puts "rss [s allocator_active]"
                     puts "frag [s allocator_frag_ratio]"
                     puts "frag_bytes [s allocator_frag_bytes]"
-                }                    
+                }
                 assert_lessthan_equal [s allocator_frag_ratio] 1.05
-            }                
+            }
             # Flush all script to make sure we don't crash after defragging them
             r script flush sync
         } {OK}
 
-        test "Active defrag big keys" {
+        test "Active defrag big keys: $type" {
             r flushdb
             r config resetstat
             r config set hz 100
@@ -277,6 +298,14 @@ start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-r
                 $rd read ; # Discard replies
             }
 
+            # create some small items (effective in cluster-enabled)
+            r set "{bighash}smallitem" val
+            r set "{biglist}smallitem" val
+            r set "{bigzset}smallitem" val
+            r set "{bigset}smallitem" val
+            r set "{bigstream}smallitem" val
+
+
             set expected_frag 1.7
             if {$::accurate} {
                 # scale the hash to 1m fields in order to have a measurable the latency
@@ -297,7 +326,7 @@ start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-r
             for {set j 0} {$j < 500000} {incr j} {
                 $rd read ; # Discard replies
             }
-            assert_equal [r dbsize] 500010
+            assert_equal [r dbsize] 500015
 
             # create some fragmentation
             for {set j 0} {$j < 500000} {incr j 2} {
@@ -306,7 +335,7 @@ start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-r
             for {set j 0} {$j < 500000} {incr j 2} {
                 $rd read ; # Discard replies
             }
-            assert_equal [r dbsize] 250010
+            assert_equal [r dbsize] 250015
 
             # start defrag
             after 120 ;# serverCron only updates the info once in 100ms
@@ -323,8 +352,12 @@ start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-r
             if {[r config get activedefrag] eq "activedefrag yes"} {
                 # wait for the active defrag to start working (decision once a second)
                 wait_for_condition 50 100 {
-                    [s active_defrag_running] ne 0
+                    [s total_active_defrag_time] ne 0
                 } else {
+                    after 120 ;# serverCron only updates the info once in 100ms
+                    puts [r info memory]
+                    puts [r info stats]
+                    puts [r memory malloc-stats]
                     fail "defrag not started."
                 }
 
@@ -371,7 +404,206 @@ start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-r
             r save ;# saving an rdb iterates over all the data / pointers
         } {OK}
 
-        test "Active defrag big list" {
+        test "Active defrag pubsub: $type" {
+            r flushdb
+            r config resetstat
+            r config set hz 100
+            r config set activedefrag no
+            r config set active-defrag-threshold-lower 5
+            r config set active-defrag-cycle-min 65
+            r config set active-defrag-cycle-max 75
+            r config set active-defrag-ignore-bytes 1500kb
+            r config set maxmemory 0
+
+            # Populate memory with interleaving pubsub-key pattern of same size
+            set n 50000
+            set dummy_channel "[string repeat x 400]"
+            set rd [redis_deferring_client]
+            set rd_pubsub [redis_deferring_client]
+            for {set j 0} {$j < $n} {incr j} {
+                set channel_name "$dummy_channel[format "%06d" $j]"
+                $rd_pubsub subscribe $channel_name
+                $rd_pubsub read ; # Discard subscribe replies
+                $rd_pubsub ssubscribe $channel_name
+                $rd_pubsub read ; # Discard ssubscribe replies
+                $rd set k$j $channel_name
+                $rd read ; # Discard set replies
+            }
+
+            after 120 ;# serverCron only updates the info once in 100ms
+            if {$::verbose} {
+                puts "used [s allocator_allocated]"
+                puts "rss [s allocator_active]"
+                puts "frag [s allocator_frag_ratio]"
+                puts "frag_bytes [s allocator_frag_bytes]"
+            }
+            assert_lessthan [s allocator_frag_ratio] 1.05
+
+            # Delete all the keys to create fragmentation
+            for {set j 0} {$j < $n} {incr j} { $rd del k$j }
+            for {set j 0} {$j < $n} {incr j} { $rd read } ; # Discard del replies
+            $rd close
+            after 120 ;# serverCron only updates the info once in 100ms
+            if {$::verbose} {
+                puts "used [s allocator_allocated]"
+                puts "rss [s allocator_active]"
+                puts "frag [s allocator_frag_ratio]"
+                puts "frag_bytes [s allocator_frag_bytes]"
+            }
+            assert_morethan [s allocator_frag_ratio] 1.35
+
+            catch {r config set activedefrag yes} e
+            if {[r config get activedefrag] eq "activedefrag yes"} {
+            
+                # wait for the active defrag to start working (decision once a second)
+                wait_for_condition 50 100 {
+                    [s total_active_defrag_time] ne 0
+                } else {
+                    after 120 ;# serverCron only updates the info once in 100ms
+                    puts [r info memory]
+                    puts [r info stats]
+                    puts [r memory malloc-stats]
+                    fail "defrag not started."
+                }
+
+                # wait for the active defrag to stop working
+                wait_for_condition 500 100 {
+                    [s active_defrag_running] eq 0
+                } else {
+                    after 120 ;# serverCron only updates the info once in 100ms
+                    puts [r info memory]
+                    puts [r memory malloc-stats]
+                    fail "defrag didn't stop."
+                }
+
+                # test the fragmentation is lower
+                after 120 ;# serverCron only updates the info once in 100ms
+                if {$::verbose} {
+                    puts "used [s allocator_allocated]"
+                    puts "rss [s allocator_active]"
+                    puts "frag [s allocator_frag_ratio]"
+                    puts "frag_bytes [s allocator_frag_bytes]"
+                }
+                assert_lessthan_equal [s allocator_frag_ratio] 1.05
+            }
+
+            # Publishes some message to all the pubsub clients to make sure that
+            # we didn't break the data structure.
+            for {set j 0} {$j < $n} {incr j} {
+                set channel "$dummy_channel[format "%06d" $j]"
+                r publish $channel "hello"
+                assert_equal "message $channel hello" [$rd_pubsub read] 
+                $rd_pubsub unsubscribe $channel
+                $rd_pubsub read
+                r spublish $channel "hello"
+                assert_equal "smessage $channel hello" [$rd_pubsub read] 
+                $rd_pubsub sunsubscribe $channel
+                $rd_pubsub read
+            }
+            $rd_pubsub close
+        }
+
+        test "Active Defrag HFE: $type" {
+            r flushdb
+            r config resetstat
+            r config set hz 100
+            r config set activedefrag no
+            # TODO: Lower the threshold after defraging the ebuckets.
+            # Now just to ensure that the reference is updated correctly.
+            r config set active-defrag-threshold-lower 12
+            r config set active-defrag-cycle-min 65
+            r config set active-defrag-cycle-max 75
+            r config set active-defrag-ignore-bytes 1500kb
+            r config set maxmemory 0
+            r config set hash-max-listpack-value 512
+            r config set hash-max-listpack-entries 10
+
+            # Populate memory with interleaving hash field of same size
+            set n 3000
+            set fields 16 ;# make all the fields in an eblist.
+            set dummy_field "[string repeat x 400]"
+            set rd [redis_deferring_client]
+            for {set i 0} {$i < $n} {incr i} {
+                for {set j 0} {$j < $fields} {incr j} {
+                    $rd hset h$i f$j $dummy_field
+                    $rd hexpire h$i 9999999 FIELDS 1 f$j
+                    $rd set "k$i$j" $dummy_field
+                }
+            }
+            for {set j 0} {$j < [expr $n*$fields]} {incr j} {
+                $rd read ; # Discard hset replies
+                $rd read ; # Discard hexpire replies
+                $rd read ; # Discard set replies
+            }
+
+            # Coverage for listpackex.
+            r hset h_lpex f0 $dummy_field
+            r hexpire h_lpex 9999999 FIELDS 1 f0
+            assert_encoding listpackex h_lpex
+
+            after 120 ;# serverCron only updates the info once in 100ms
+            if {$::verbose} {
+                puts "used [s allocator_allocated]"
+                puts "rss [s allocator_active]"
+                puts "frag [s allocator_frag_ratio]"
+                puts "frag_bytes [s allocator_frag_bytes]"
+            }
+            assert_lessthan [s allocator_frag_ratio] 1.05
+
+            # Delete all the keys to create fragmentation
+            for {set i 0} {$i < $n} {incr i} {
+                for {set j 0} {$j < $fields} {incr j} {
+                    r del "k$i$j"
+                }
+            }
+            $rd close
+            after 120 ;# serverCron only updates the info once in 100ms
+            if {$::verbose} {
+                puts "used [s allocator_allocated]"
+                puts "rss [s allocator_active]"
+                puts "frag [s allocator_frag_ratio]"
+                puts "frag_bytes [s allocator_frag_bytes]"
+            }
+            assert_morethan [s allocator_frag_ratio] 1.35
+
+            catch {r config set activedefrag yes} e
+            if {[r config get activedefrag] eq "activedefrag yes"} {
+            
+                # wait for the active defrag to start working (decision once a second)
+                wait_for_condition 50 100 {
+                    [s total_active_defrag_time] ne 0
+                } else {
+                    after 120 ;# serverCron only updates the info once in 100ms
+                    puts [r info memory]
+                    puts [r info stats]
+                    puts [r memory malloc-stats]
+                    fail "defrag not started."
+                }
+
+                # wait for the active defrag to stop working
+                wait_for_condition 500 100 {
+                    [s active_defrag_running] eq 0
+                } else {
+                    after 120 ;# serverCron only updates the info once in 100ms
+                    puts [r info memory]
+                    puts [r memory malloc-stats]
+                    fail "defrag didn't stop."
+                }
+
+                # test the fragmentation is lower
+                after 120 ;# serverCron only updates the info once in 100ms
+                if {$::verbose} {
+                    puts "used [s allocator_allocated]"
+                    puts "rss [s allocator_active]"
+                    puts "frag [s allocator_frag_ratio]"
+                    puts "frag_bytes [s allocator_frag_bytes]"
+                }
+                assert_lessthan_equal [s allocator_frag_ratio] 1.5
+            }
+        }
+
+        if {$type eq "standalone"} { ;# skip in cluster mode
+        test "Active defrag big list: $type" {
             r flushdb
             r config resetstat
             r config set hz 100
@@ -419,8 +651,12 @@ start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-r
             if {[r config get activedefrag] eq "activedefrag yes"} {
                 # wait for the active defrag to start working (decision once a second)
                 wait_for_condition 50 100 {
-                    [s active_defrag_running] ne 0
+                    [s total_active_defrag_time] ne 0
                 } else {
+                    after 120 ;# serverCron only updates the info once in 100ms
+                    puts [r info memory]
+                    puts [r info stats]
+                    puts [r memory malloc-stats]
                     fail "defrag not started."
                 }
 
@@ -448,12 +684,16 @@ start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-r
                     }
                 }
                 if {$::verbose} {
+                    puts "used [s allocator_allocated]"
+                    puts "rss [s allocator_active]"
+                    puts "frag_bytes [s allocator_frag_bytes]"
                     puts "frag $frag"
                     puts "misses: $misses"
                     puts "hits: $hits"
                     puts "max latency $max_latency"
                     puts [r latency latest]
                     puts [r latency history active-defrag-cycle]
+                    puts [r memory malloc-stats]
                 }
                 assert {$frag < 1.1}
                 # due to high fragmentation, 100hz, and active-defrag-cycle-max set to 75,
@@ -473,7 +713,7 @@ start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-r
             r del biglist1 ;# coverage for quicklistBookmarksClear
         } {1}
 
-        test "Active defrag edge case" {
+        test "Active defrag edge case: $type" {
             # there was an edge case in defrag where all the slabs of a certain bin are exact the same
             # % utilization, with the exception of the current slab from which new allocations are made
             # if the current slab is lower in utilization the defragger would have ended up in stagnation,
@@ -493,7 +733,7 @@ start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-r
                 set expected_frag 1.3
 
                 r debug mallctl-str thread.tcache.flush VOID
-                # fill the first slab containin 32 regs of 640 bytes.
+                # fill the first slab containing 32 regs of 640 bytes.
                 for {set j 0} {$j < 32} {incr j} {
                     r setrange "_$j" 600 x
                     r debug mallctl-str thread.tcache.flush VOID
@@ -539,8 +779,12 @@ start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-r
                 if {[r config get activedefrag] eq "activedefrag yes"} {
                     # wait for the active defrag to start working (decision once a second)
                     wait_for_condition 50 100 {
-                        [s active_defrag_running] ne 0
+                        [s total_active_defrag_time] ne 0
                     } else {
+                        after 120 ;# serverCron only updates the info once in 100ms
+                        puts [r info memory]
+                        puts [r info stats]
+                        puts [r memory malloc-stats]
                         fail "defrag not started."
                     }
 
@@ -574,7 +818,16 @@ start_server {tags {"defrag external:skip"} overrides {appendonly yes auto-aof-r
                 assert {$digest eq $newdigest}
                 r save ;# saving an rdb iterates over all the data / pointers
             }
+        } ;# standalone
         }
     }
-}
+    }
+
+    start_cluster 1 0 {tags {"defrag external:skip cluster"} overrides {appendonly yes auto-aof-rewrite-percentage 0 save "" loglevel debug}} {
+        test_active_defrag "cluster"
+    }
+
+    start_server {tags {"defrag external:skip standalone"} overrides {appendonly yes auto-aof-rewrite-percentage 0 save "" loglevel debug}} {
+        test_active_defrag "standalone"
+    }
 } ;# run_solo
diff --git a/tests/unit/moduleapi/aclcheck.tcl b/tests/unit/moduleapi/aclcheck.tcl
index ae3f67156ce..1ea09a2324c 100644
--- a/tests/unit/moduleapi/aclcheck.tcl
+++ b/tests/unit/moduleapi/aclcheck.tcl
@@ -104,15 +104,22 @@ start_server {tags {"modules acl"}} {
         assert_equal [r acl DRYRUN j2 aclcheck.module.command.aclcategories.read.only.category] OK
     }
 
+    test {Unload the module - aclcheck} {
+        assert_equal {OK} [r module unload aclcheck]
+    }
+}
+
+start_server {tags {"modules acl"}} {
     test {test existing users to have access to module commands loaded on runtime} {
-        assert_equal [r module unload aclcheck] OK
         r acl SETUSER j3 on >password -@all +@WRITE
         assert_equal [r module load $testmodule] OK
         assert_equal [r acl DRYRUN j3 aclcheck.module.command.aclcategories.write] OK
+        assert_equal {OK} [r module unload aclcheck]
     }
+}
 
+start_server {tags {"modules acl"}} {
     test {test existing users without permissions, do not have access to module commands loaded on runtime.} {
-        assert_equal [r module unload aclcheck] OK
         r acl SETUSER j4 on >password -@all +@READ
         r acl SETUSER j5 on >password -@all +@WRITE
         assert_equal [r module load $testmodule] OK
@@ -131,7 +138,21 @@ start_server {tags {"modules acl"}} {
         assert_equal {User j7 has no permissions to run the 'aclcheck.module.command.aclcategories.write.function.read.category' command} $e
     }
 
-    test "Unload the module - aclcheck" {
+    test {test if foocategory acl categories is added} {
+        r acl SETUSER j8 on >password -@all +@foocategory
+        assert_equal [r acl DRYRUN j8 aclcheck.module.command.test.add.new.aclcategories] OK
+    }
+
+    test {test permission compaction and simplification for categories added by a module} {
+        r acl SETUSER j9 on >password -@all +@foocategory -@foocategory
+        catch {r ACL GETUSER j9} res
+        assert_equal {-@all -@foocategory} [lindex $res 5]
         assert_equal {OK} [r module unload aclcheck]
     }
 }
+
+start_server {tags {"modules acl"}} {
+    test {test module load fails if exceeds the maximum number of adding acl categories} {
+        assert_error {ERR Error loading the extension. Please check the server logs.} {r module load $testmodule 1}
+    }
+}
diff --git a/tests/unit/moduleapi/async_rm_call.tcl b/tests/unit/moduleapi/async_rm_call.tcl
index 4799ea124e9..1bf12de2377 100644
--- a/tests/unit/moduleapi/async_rm_call.tcl
+++ b/tests/unit/moduleapi/async_rm_call.tcl
@@ -314,6 +314,14 @@ start_server {tags {"modules"}} {
         r lpush l a
         assert_equal [$rd read] {OK}
 
+        # Explanation of the first multi exec block:
+        # {lpop l} - pop the value by our blocking command 'blpop_and_set_multiple_keys'
+        # {set string_foo 1} - the action of our blocking command 'blpop_and_set_multiple_keys'
+        # {set string_bar 2} - the action of our blocking command 'blpop_and_set_multiple_keys'
+        # {incr string_changed{string_foo}} - post notification job that was registered when 'string_foo' changed
+        # {incr string_changed{string_bar}} - post notification job that was registered when 'string_bar' changed
+        # {incr string_total} - post notification job that was registered when string_changed{string_foo} changed
+        # {incr string_total} - post notification job that was registered when string_changed{string_bar} changed
         assert_replication_stream $repl {
             {select *}
             {lpush l a}
@@ -355,6 +363,25 @@ start_server {tags {"modules"}} {
         r lpush l a
         assert_equal [$rd read] {OK}
 
+        # Explanation of the first multi exec block:
+        # {lpop l} - pop the value by our blocking command 'blpop_and_set_multiple_keys'
+        # {set string_foo 1} - the action of our blocking command 'blpop_and_set_multiple_keys'
+        # {set string_bar 2} - the action of our blocking command 'blpop_and_set_multiple_keys'
+        # {incr string_changed{string_foo}} - post notification job that was registered when 'string_foo' changed
+        # {incr string_changed{string_bar}} - post notification job that was registered when 'string_bar' changed
+        # {incr string_total} - post notification job that was registered when string_changed{string_foo} changed
+        # {incr string_total} - post notification job that was registered when string_changed{string_bar} changed
+        #
+        # Explanation of the second multi exec block:
+        # {lpop l} - pop the value by our blocking command 'blpop_and_set_multiple_keys'
+        # {del string_foo} - lazy expiration of string_foo when 'blpop_and_set_multiple_keys' tries to write to it. 
+        # {set string_foo 1} - the action of our blocking command 'blpop_and_set_multiple_keys'
+        # {set string_bar 2} - the action of our blocking command 'blpop_and_set_multiple_keys'
+        # {incr expired} - the post notification job, registered after string_foo got expired
+        # {incr string_changed{string_foo}} - post notification job triggered when we set string_foo
+        # {incr string_changed{string_bar}} - post notification job triggered when we set string_bar
+        # {incr string_total} - post notification job triggered when we incr 'string_changed{string_foo}'
+        # {incr string_total} - post notification job triggered when we incr 'string_changed{string_bar}'
         assert_replication_stream $repl {
             {select *}
             {lpush l a}
diff --git a/tests/unit/moduleapi/blockedclient.tcl b/tests/unit/moduleapi/blockedclient.tcl
index 9d475ebc067..22b2c4bae59 100644
--- a/tests/unit/moduleapi/blockedclient.tcl
+++ b/tests/unit/moduleapi/blockedclient.tcl
@@ -188,7 +188,7 @@ foreach call_type {nested normal} {
 
         # make sure we get BUSY error, and that we didn't get here too early
         assert_error {*BUSY Slow module operation*} {r ping}
-        assert_morethan [expr [clock clicks -milliseconds]-$start] $busy_time_limit
+        assert_morethan_equal [expr [clock clicks -milliseconds]-$start] $busy_time_limit
         # abort the blocking operation
         r set_slow_bg_operation 0
 
@@ -278,7 +278,23 @@ foreach call_type {nested normal} {
     }
 
     test {Unblock by timer} {
-        assert_match "OK" [r unblock_by_timer 100]
+        # When the client is unlock, we will get the OK reply.
+        assert_match "OK" [r unblock_by_timer 100 0]
+    }
+
+    test {block time is shorter than timer period} {
+        # This command does not have the reply.
+        set rd [redis_deferring_client]
+        $rd unblock_by_timer 100 10
+        # Wait for the client to unlock.
+        after 120
+        $rd close
+    }
+
+    test {block time is equal to timer period} {
+        # These time is equal, they will be unlocked in the same event loop,
+        # when the client is unlock, we will get the OK reply from timer.
+        assert_match "OK" [r unblock_by_timer 100 100]
     }
     
     test "Unload the module - blockedclient" {
diff --git a/tests/unit/moduleapi/cluster.tcl b/tests/unit/moduleapi/cluster.tcl
index 80750838729..cddd90f759b 100644
--- a/tests/unit/moduleapi/cluster.tcl
+++ b/tests/unit/moduleapi/cluster.tcl
@@ -163,21 +163,23 @@ start_cluster 3 0 [list config_lines $modules] {
     $node2_rd close
 }
 
-set modules [list loadmodule [file normalize tests/modules/keyspace_events.so]]
+set testmodule_keyspace_events [file normalize tests/modules/keyspace_events.so]
+set testmodule_postnotifications "[file normalize tests/modules/postnotifications.so] with_key_events"
+set modules [list loadmodule $testmodule_keyspace_events loadmodule $testmodule_postnotifications]
 start_cluster 2 2 [list config_lines $modules] {
 
     set master1 [srv 0 client]
     set master2 [srv -1 client]
     set replica1 [srv -2 client]
     set replica2 [srv -3 client]
-    
+
     test "Verify keys deletion and notification effects happened on cluster slots change are replicated inside multi exec" {
         $master2 set count_dels_{4oi} 1
         $master2 del count_dels_{4oi}
         assert_equal 1 [$master2 keyspace.get_dels]
         assert_equal 1 [$replica2 keyspace.get_dels]
         $master2 set count_dels_{4oi} 1
-        
+
         set repl [attach_to_replication_stream_on_connection -3]
 
         $master1 cluster bumpepoch
@@ -195,10 +197,12 @@ start_cluster 2 2 [list config_lines $modules] {
             fail "replica did not increase del counter"
         }
 
+        # the {lpush before_deleted count_dels_{4oi}} is a post notification job registered when 'count_dels_{4oi}' was removed
         assert_replication_stream $repl {
             {multi}
             {del count_dels_{4oi}}
             {keyspace.incr_dels}
+            {lpush before_deleted count_dels_{4oi}}
             {exec}
         }
         close_replication_stream $repl
diff --git a/tests/unit/moduleapi/commandfilter.tcl b/tests/unit/moduleapi/commandfilter.tcl
index 99f9a4dd92e..72b16ec9784 100644
--- a/tests/unit/moduleapi/commandfilter.tcl
+++ b/tests/unit/moduleapi/commandfilter.tcl
@@ -95,7 +95,7 @@ start_server {tags {"modules"}} {
     test "Unload the module - commandfilter" {
         assert_equal {OK} [r module unload commandfilter]
     }
-} 
+}
 
 test {RM_CommandFilterArgInsert and script argv caching} {
     # coverage for scripts calling commands that expand the argv array
@@ -162,4 +162,14 @@ test {Filtering based on client id} {
 
         $rr close
     }
-}
\ No newline at end of file
+}
+
+start_server {} {
+    test {OnLoad failure will handle un-registration} {
+        catch {r module load $testmodule log-key 0 noload}
+        r set mykey @log
+        assert_equal [r lrange log-key 0 -1] {}
+        r rpush mylist elem1 @delme elem2
+        assert_equal [r lrange mylist 0 -1] {elem1 @delme elem2}
+    }
+}
diff --git a/tests/unit/moduleapi/crash.tcl b/tests/unit/moduleapi/crash.tcl
new file mode 100644
index 00000000000..dedbb1a1eb2
--- /dev/null
+++ b/tests/unit/moduleapi/crash.tcl
@@ -0,0 +1,63 @@
+# This file is used to test certain crash edge cases to make sure they produce
+# correct stack traces for debugging.
+set testmodule [file normalize tests/modules/crash.so]
+set backtrace_supported [system_backtrace_supported]
+
+# Valgrind will complain that the process terminated by a signal, skip it.
+if {!$::valgrind} {
+    start_server {tags {"modules"}} {
+        r module load $testmodule assert
+        test {Test module crash when info crashes with an assertion } {
+            catch {r 0 info infocrash}
+            set res [wait_for_log_messages 0 {"*=== REDIS BUG REPORT START: Cut & paste starting from here ===*"} 0 10 1000]
+            set loglines [lindex $res 1]
+
+            set res [wait_for_log_messages 0 {"*ASSERTION FAILED*"} $loglines 10 1000]
+            set loglines [lindex $res 1]
+
+            set res [wait_for_log_messages 0 {"*RECURSIVE ASSERTION FAILED*"} $loglines 10 1000]
+            set loglines [lindex $res 1]
+
+            wait_for_log_messages 0 {"*=== REDIS BUG REPORT END. Make sure to include from START to END. ===*"} $loglines 10 1000
+            assert_equal 1 [count_log_message 0 "=== REDIS BUG REPORT END. Make sure to include from START to END. ==="]
+            assert_equal 2 [count_log_message 0 "ASSERTION FAILED"]
+            if {$backtrace_supported} {
+                # Make sure the crash trace is printed twice. There will be 3 instances of,
+                # assertCrash 1 in the first stack trace and 2 in the second.
+                assert_equal 3 [count_log_message 0 "assertCrash"]
+            }
+            assert_equal 1 [count_log_message 0 "RECURSIVE ASSERTION FAILED"]
+            assert_equal 1 [count_log_message 0 "=== REDIS BUG REPORT START: Cut & paste starting from here ==="]
+        }
+    }
+
+    start_server {tags {"modules"}} {
+        r module load $testmodule segfault
+        test {Test module crash when info crashes with a segfault} {
+            catch {r 0 info infocrash}
+            set res [wait_for_log_messages 0 {"*=== REDIS BUG REPORT START: Cut & paste starting from here ===*"} 0 10 1000]
+            set loglines [lindex $res 1]
+
+            if {$backtrace_supported} {
+                set res [wait_for_log_messages 0 {"*Crashed running the instruction at*"} $loglines 10 1000]
+                set loglines [lindex $res 1]
+
+                set res [wait_for_log_messages 0 {"*Crashed running signal handler. Providing reduced version of recursive crash report*"} $loglines 10 1000]
+                set loglines [lindex $res 1]
+                set res [wait_for_log_messages 0 {"*Crashed running the instruction at*"} $loglines 10 1000]
+                set loglines [lindex $res 1]
+            }
+
+            wait_for_log_messages 0 {"*=== REDIS BUG REPORT END. Make sure to include from START to END. ===*"} $loglines 10 1000
+            assert_equal 1 [count_log_message 0 "=== REDIS BUG REPORT END. Make sure to include from START to END. ==="]
+            assert_equal 1 [count_log_message 0 "Crashed running signal handler. Providing reduced version of recursive crash report"]
+            if {$backtrace_supported} {
+                assert_equal 2 [count_log_message 0 "Crashed running the instruction at"]
+                # Make sure the crash trace is printed twice. There will be 3 instances of 
+                # modulesCollectInfo, 1 in the first stack trace and 2 in the second.
+                assert_equal 3 [count_log_message 0 "modulesCollectInfo"]
+            }
+            assert_equal 1 [count_log_message 0 "=== REDIS BUG REPORT START: Cut & paste starting from here ==="]
+        }
+    }
+}
diff --git a/tests/unit/moduleapi/fork.tcl b/tests/unit/moduleapi/fork.tcl
index c89a6c52401..9d1f9c184c6 100644
--- a/tests/unit/moduleapi/fork.tcl
+++ b/tests/unit/moduleapi/fork.tcl
@@ -1,13 +1,5 @@
 set testmodule [file normalize tests/modules/fork.so]
 
-proc count_log_message {pattern} {
-    set status [catch {exec grep -c $pattern < [srv 0 stdout]} result]
-    if {$status == 1} {
-        set result 0
-    }
-    return $result
-}
-
 start_server {tags {"modules"}} {
     r module load $testmodule
 
@@ -27,20 +19,20 @@ start_server {tags {"modules"}} {
         # use a longer time to avoid the child exiting before being killed
         r fork.create 3 100000000 ;# 100s
         wait_for_condition 20 100 {
-            [count_log_message "fork child started"] == 2
+            [count_log_message 0 "fork child started"] == 2
         } else {
             fail "fork didn't start"
         }
 
         # module fork twice
         assert_error {Fork failed} {r fork.create 0 1}
-        assert {[count_log_message "Can't fork for module: File exists"] eq "1"}
+        assert {[count_log_message 0 "Can't fork for module: File exists"] eq "1"}
 
         r fork.kill
 
-        assert {[count_log_message "Received SIGUSR1 in child"] eq "1"}
+        assert {[count_log_message 0 "Received SIGUSR1 in child"] eq "1"}
         # check that it wasn't printed again (the print belong to the previous test)
-        assert {[count_log_message "fork child exiting"] eq "1"}
+        assert {[count_log_message 0 "fork child exiting"] eq "1"}
     }
 
     test "Unload the module - fork" {
diff --git a/tests/unit/moduleapi/hash.tcl b/tests/unit/moduleapi/hash.tcl
index 116b1c5120f..8cd919b3ab3 100644
--- a/tests/unit/moduleapi/hash.tcl
+++ b/tests/unit/moduleapi/hash.tcl
@@ -21,6 +21,45 @@ start_server {tags {"modules"}} {
         r hgetall k
     } {squirrel ofcourse banana no what nothing something nice}
 
+    test {Module hash - set (override) NX expired field successfully} {
+        r debug set-active-expire 0
+        r del H1 H2
+        r hash.set H1 "n" f1 v1
+        r hpexpire H1 1 FIELDS 1 f1
+        r hash.set H2 "n" f1 v1 f2 v2
+        r hpexpire H2 1 FIELDS 1 f1
+        after 5
+        assert_equal 0 [r hash.set H1 "n" f1 xx]
+        assert_equal "f1 xx" [r hgetall H1]
+        assert_equal 0 [r hash.set H2 "n" f1 yy]
+        assert_equal "f1 f2 v2 yy" [lsort [r hgetall H2]]
+        r debug set-active-expire 1
+    } {OK} {needs:debug}
+
+    test {Module hash - set XX of expired field gets failed as expected} {
+        r debug set-active-expire 0
+        r del H1 H2
+        r hash.set H1 "n" f1 v1
+        r hpexpire H1 1 FIELDS 1 f1
+        r hash.set H2 "n" f1 v1 f2 v2
+        r hpexpire H2 1 FIELDS 1 f1
+        after 5
+
+        # expected to fail on condition XX. hgetall should return empty list
+        r hash.set H1 "x" f1 xx
+        assert_equal "" [lsort [r hgetall H1]]
+        # But expired field was not lazy deleted
+        assert_equal 1 [r hlen H1]
+
+        # expected to fail on condition XX. hgetall should return list without expired f1
+        r hash.set H2 "x" f1 yy
+        assert_equal "f2 v2" [lsort [r hgetall H2]]
+        # But expired field was not lazy deleted
+        assert_equal 2 [r hlen H2]
+
+        r debug set-active-expire 1
+    } {OK} {needs:debug}
+
     test "Unload the module - hash" {
         assert_equal {OK} [r module unload hash]
     }
diff --git a/tests/unit/moduleapi/hooks.tcl b/tests/unit/moduleapi/hooks.tcl
index 6f9bc3becd7..94b0f6f3125 100644
--- a/tests/unit/moduleapi/hooks.tcl
+++ b/tests/unit/moduleapi/hooks.tcl
@@ -310,4 +310,12 @@ tags "modules" {
             assert_equal [string match {*module-event-shutdown*} [exec tail -5 < $replica_stdout]] 1
         }
     }
+
+    start_server {} {
+        test {OnLoad failure will handle un-registration} {
+            catch {r module load $testmodule noload}
+            r flushall
+            r ping
+        }
+    }
 }
diff --git a/tests/unit/moduleapi/keyspace_events.tcl b/tests/unit/moduleapi/keyspace_events.tcl
index 19c712052e3..1323b129665 100644
--- a/tests/unit/moduleapi/keyspace_events.tcl
+++ b/tests/unit/moduleapi/keyspace_events.tcl
@@ -102,4 +102,17 @@ tags "modules" {
             assert_equal {OK} [r set x 1 EX 1]
         }
     }
+
+    start_server {} {
+        test {OnLoad failure will handle un-registration} {
+            catch {r module load $testmodule noload}
+            r set x 1
+            r hset y f v
+            r lpush z 1 2 3
+            r sadd p 1 2 3
+            r zadd t 1 f1 2 f2
+            r xadd s * f v
+            r ping
+        }
+    }
 }
diff --git a/tests/unit/moduleapi/misc.tcl b/tests/unit/moduleapi/misc.tcl
index cf205462ef8..78def547723 100644
--- a/tests/unit/moduleapi/misc.tcl
+++ b/tests/unit/moduleapi/misc.tcl
@@ -493,6 +493,14 @@ start_server {overrides {save {900 1}} tags {"modules"}} {
         # server is writable again
         r set x y
     } {OK}
+
+    test "malloc API" {
+        assert_equal {OK} [r test.malloc_api 0]
+    }
+
+    test "Cluster keyslot" {
+        assert_equal 12182 [r test.keyslot foo]
+    }
 }
 
 start_server {tags {"modules"}} {
diff --git a/tests/unit/moduleapi/postnotifications.tcl b/tests/unit/moduleapi/postnotifications.tcl
index 11b003a15d0..7e48c7bc339 100644
--- a/tests/unit/moduleapi/postnotifications.tcl
+++ b/tests/unit/moduleapi/postnotifications.tcl
@@ -1,7 +1,8 @@
 set testmodule [file normalize tests/modules/postnotifications.so]
 
 tags "modules" {
-    start_server [list overrides [list loadmodule "$testmodule"]] {
+    start_server {} {
+        r module load $testmodule with_key_events
 
         test {Test write on post notification callback} {
             set repl [attach_to_replication_stream]
@@ -9,11 +10,12 @@ tags "modules" {
             r set string_x 1
             assert_equal {1} [r get string_changed{string_x}]
             assert_equal {1} [r get string_total]
-            
+
             r set string_x 2
             assert_equal {2} [r get string_changed{string_x}]
             assert_equal {2} [r get string_total]
 
+            # the {lpush before_overwritten string_x} is a post notification job registered when 'string_x' was overwritten
             assert_replication_stream $repl {
                 {multi}
                 {select *}
@@ -23,6 +25,7 @@ tags "modules" {
                 {exec}
                 {multi}
                 {set string_x 2}
+                {lpush before_overwritten string_x}
                 {incr string_changed{string_x}}
                 {incr string_total}
                 {exec}
@@ -37,7 +40,7 @@ tags "modules" {
             assert_equal {OK} [r postnotification.async_set]
             assert_equal {1} [r get string_changed{string_x}]
             assert_equal {1} [r get string_total]
-            
+
             assert_replication_stream $repl {
                 {multi}
                 {select *}
@@ -63,12 +66,14 @@ tags "modules" {
                 fail "Failed waiting for x to expired"
             }
 
+            # the {lpush before_expired x} is a post notification job registered before 'x' got expired
             assert_replication_stream $repl {
                 {select *}
                 {set x 1}
                 {pexpireat x *}
                 {multi}
                 {del x}
+                {lpush before_expired x}
                 {incr expired}
                 {exec}
             }
@@ -85,12 +90,14 @@ tags "modules" {
             after 10
             assert_equal {} [r get x]
 
+            # the {lpush before_expired x} is a post notification job registered before 'x' got expired
             assert_replication_stream $repl {
                 {select *}
                 {set x 1}
                 {pexpireat x *}
                 {multi}
                 {del x}
+                {lpush before_expired x}
                 {incr expired}
                 {exec}
             }
@@ -108,6 +115,7 @@ tags "modules" {
             after 10
             assert_equal {OK} [r set read_x 1]
 
+            # the {lpush before_expired x} is a post notification job registered before 'x' got expired
             assert_replication_stream $repl {
                 {select *}
                 {set x 1}
@@ -115,6 +123,7 @@ tags "modules" {
                 {multi}
                 {set read_x 1}
                 {del x}
+                {lpush before_expired x}
                 {incr expired}
                 {exec}
             }
@@ -143,16 +152,18 @@ tags "modules" {
             r flushall
             set repl [attach_to_replication_stream]
             r set x 1
-            r config set maxmemory-policy allkeys-random 
+            r config set maxmemory-policy allkeys-random
             r config set maxmemory 1
 
             assert_error {OOM *} {r set y 1}
 
+            # the {lpush before_evicted x} is a post notification job registered before 'x' got evicted
             assert_replication_stream $repl {
                 {select *}
                 {set x 1}
                 {multi}
                 {del x}
+                {lpush before_evicted x}
                 {incr evicted}
                 {exec}
             }
@@ -164,7 +175,8 @@ tags "modules" {
 set testmodule2 [file normalize tests/modules/keyspace_events.so]
 
 tags "modules" {
-    start_server [list overrides [list loadmodule "$testmodule"]] {
+    start_server {} {
+        r module load $testmodule with_key_events
         r module load $testmodule2
         test {Test write on post notification callback} {
             set repl [attach_to_replication_stream]
@@ -172,7 +184,7 @@ tags "modules" {
             r set string_x 1
             assert_equal {1} [r get string_changed{string_x}]
             assert_equal {1} [r get string_total]
-            
+
             r set string_x 2
             assert_equal {2} [r get string_changed{string_x}]
             assert_equal {2} [r get string_total]
@@ -181,6 +193,7 @@ tags "modules" {
             assert_equal {1} [r get string_changed{string1_x}]
             assert_equal {3} [r get string_total]
 
+            # the {lpush before_overwritten string_x} is a post notification job registered before 'string_x' got overwritten
             assert_replication_stream $repl {
                 {multi}
                 {select *}
@@ -190,6 +203,7 @@ tags "modules" {
                 {exec}
                 {multi}
                 {set string_x 2}
+                {lpush before_overwritten string_x}
                 {incr string_changed{string_x}}
                 {incr string_total}
                 {exec}
@@ -202,4 +216,4 @@ tags "modules" {
             close_replication_stream $repl
         }
     }
-}
\ No newline at end of file
+}
diff --git a/tests/unit/moduleapi/propagate.tcl b/tests/unit/moduleapi/propagate.tcl
index 90a369da216..5e32e4e490f 100644
--- a/tests/unit/moduleapi/propagate.tcl
+++ b/tests/unit/moduleapi/propagate.tcl
@@ -761,3 +761,41 @@ tags "modules aof" {
     }
     }
 }
+
+# This test does not really test module functionality, but rather uses a module
+# command to test Redis replication mechanisms.
+test {Replicas that was marked as CLIENT_CLOSE_ASAP should not keep the replication backlog from been trimmed} {
+    start_server [list overrides [list loadmodule "$testmodule"]] {
+        set replica [srv 0 client]
+        start_server [list overrides [list loadmodule "$testmodule"]] {
+            set master [srv 0 client]
+            set master_host [srv 0 host]
+            set master_port [srv 0 port]
+            $master config set client-output-buffer-limit "replica 10mb 5mb 0"
+
+            # Start the replication process...
+            $replica replicaof $master_host $master_port
+            wait_for_sync $replica
+
+            test {module propagates from timer} {
+                # Replicate large commands to make the replica disconnected.
+                $master write [format_command propagate-test.verbatim 100000 [string repeat "a" 1000]] ;# almost 100mb
+                # Execute this command together with module commands within the same
+                # event loop to prevent periodic cleanup of replication backlog.
+                $master write [format_command info memory]
+                $master flush
+                $master read ;# propagate-test.verbatim
+                set res [$master read] ;# info memory
+
+                # Wait for the replica to be disconnected.
+                wait_for_log_messages 0 {"*flags=S*scheduled to be closed ASAP for overcoming of output buffer limits*"} 0 1500 10
+                # Due to the replica reaching the soft limit (5MB), memory peaks should not significantly
+                # exceed the replica soft limit. Furthermore, as the replica release its reference to
+                # replication backlog, it should be properly trimmed, the memory usage of replication
+                # backlog should not significantly exceed repl-backlog-size (default 1MB). */
+                assert_lessthan [getInfoProperty $res used_memory_peak] 10000000;# less than 10mb
+                assert_lessthan [getInfoProperty $res mem_replication_backlog] 2000000;# less than 2mb
+            }
+        }
+    }
+}
diff --git a/tests/unit/moduleapi/scan.tcl b/tests/unit/moduleapi/scan.tcl
index 1efd6ac55a5..2f012726706 100644
--- a/tests/unit/moduleapi/scan.tcl
+++ b/tests/unit/moduleapi/scan.tcl
@@ -24,6 +24,18 @@ start_server {tags {"modules"}} {
         lsort [r scan.scan_key hh1]
     } {{f1 1}}
 
+    test {Module scan hash listpack with hexpire} {
+        r debug set-active-expire 0
+        r hmset hh f1 v1 f2 v2 f3 v3
+        r hexpire hh 100000 fields 1 f1
+        r hpexpire hh 1 fields 1 f3
+        after 10
+        assert_range [r httl hh fields 1 f1] 10000 100000
+        assert_encoding listpackex hh
+        r debug set-active-expire 1
+        lsort [r scan.scan_key hh]
+    } {{f1 v1} {f2 v2}} {needs:debug}
+
     test {Module scan hash dict} {
         r config set hash-max-ziplist-entries 2
         r hmset hh f3 v3
@@ -31,6 +43,28 @@ start_server {tags {"modules"}} {
         lsort [r scan.scan_key hh]
     } {{f1 v1} {f2 v2} {f3 v3}}
 
+    test {Module scan hash dict with hexpire} {
+        r config set hash-max-listpack-entries 1
+        r del hh
+        r hmset hh f1 v1 f2 v2 f3 v3
+        r hexpire hh 100000 fields 1 f2
+        r hpexpire hh 5 fields 1 f3
+        assert_range [r httl hh fields 1 f2] 10000 100000
+        assert_encoding hashtable hh
+        after 10
+        lsort [r scan.scan_key hh]
+    } {{f1 v1} {f2 v2}}
+
+    test {Module scan hash with hexpire can return no items} {
+        r del hh
+        r debug set-active-expire 0
+        r hmset hh f1 v1 f2 v2 f3 v3
+        r hpexpire hh 1 fields 3 f1 f2 f3
+        after 10
+        r debug set-active-expire 1
+        lsort [r scan.scan_key hh]
+    } {} {needs:debug}
+
     test {Module scan zset listpack} {
         r zadd zz 1 f1 2 f2
         assert_encoding listpack zz
diff --git a/tests/unit/moduleapi/usercall.tcl b/tests/unit/moduleapi/usercall.tcl
index 51ee1a4af71..0f7a318f7e2 100644
--- a/tests/unit/moduleapi/usercall.tcl
+++ b/tests/unit/moduleapi/usercall.tcl
@@ -133,4 +133,36 @@ start_server {tags {"modules usercall"}} {
         assert_equal [dict get $entry reason] {command}
         assert_match {*cmd=usercall.call_with_user_flag*} [dict get $entry client-info]
     }
+
+    start_server {tags {"wait aof network external:skip"}} {
+        set slave [srv 0 client]
+        set slave_host [srv 0 host]
+        set slave_port [srv 0 port]
+        set slave_pid [srv 0 pid]
+        set master [srv -1 client]
+        set master_host [srv -1 host]
+        set master_port [srv -1 port]
+
+        $master config set appendonly yes
+        $master config set appendfsync everysec
+        $slave config set appendonly yes
+        $slave config set appendfsync everysec
+
+        test {Setup slave} {
+            $slave slaveof $master_host $master_port
+            wait_for_condition 50 100 {
+                [s 0 master_link_status] eq {up}
+            } else {
+                fail "Replication not started."
+            }
+        }
+
+        test {test module replicate only to replicas and WAITAOF} {
+            $master set x 1
+            assert_equal [$master waitaof 1 1 10000] {1 1}
+            $master usercall.call_with_user_flag A! config set loglevel notice
+            # Make sure WAITAOF doesn't hang
+            assert_equal [$master waitaof 1 1 10000] {1 1}
+        }
+    }
 }
diff --git a/tests/unit/multi.tcl b/tests/unit/multi.tcl
index 851e02247b7..4e8e8072561 100644
--- a/tests/unit/multi.tcl
+++ b/tests/unit/multi.tcl
@@ -825,7 +825,9 @@ start_server {tags {"multi"}} {
                 {multi}
                 {xclaim *}
                 {xclaim *}
+                {xgroup SETID * ENTRIESREAD *}
                 {xclaim *}
+                {xgroup SETID * ENTRIESREAD *}
                 {exec}
             }
             close_replication_stream $repl
diff --git a/tests/unit/oom-score-adj.tcl b/tests/unit/oom-score-adj.tcl
index 6c7b7139264..4c32a318fb2 100644
--- a/tests/unit/oom-score-adj.tcl
+++ b/tests/unit/oom-score-adj.tcl
@@ -1,5 +1,4 @@
 set system_name [string tolower [exec uname -s]]
-set user_id [exec id -u]
 
 if {$system_name eq {linux}} {
     start_server {tags {"oom-score-adj external:skip"}} {
@@ -56,8 +55,15 @@ if {$system_name eq {linux}} {
             }
         }
 
+        # Determine whether the current user is unprivileged
+        set original_value [exec cat /proc/self/oom_score_adj]
+        catch {
+            set fd [open "/proc/self/oom_score_adj" "w"]
+            puts $fd -1000
+            close $fd
+        } e
         # Failed oom-score-adj tests can only run unprivileged
-        if {$user_id != 0} {
+        if {[string match "*permission denied*" $e]} {
             test {CONFIG SET oom-score-adj handles configuration failures} {
                 # Bad config
                 r config set oom-score-adj no
@@ -81,6 +87,11 @@ if {$system_name eq {linux}} {
                 # Make sure previous values remain
                 assert {[r config get oom-score-adj-values] == {oom-score-adj-values {0 100 100}}}
             }
+        } else {
+            # Restore the original oom_score_adj value
+            set fd [open "/proc/self/oom_score_adj" "w"]
+            puts $fd $original_value
+            close $fd
         }
 
         test {CONFIG SET oom-score-adj-values doesn't touch proc when disabled} {
@@ -101,25 +112,27 @@ if {$system_name eq {linux}} {
 
         test {CONFIG SET oom score restored on disable} {
             r config set oom-score-adj no
-            set_oom_score_adj 22
-            assert_equal [get_oom_score_adj] 22
+            set custom_oom [expr [get_oom_score_adj] + 1]
+            set_oom_score_adj $custom_oom
+            assert_equal [get_oom_score_adj] $custom_oom
 
             r config set oom-score-adj-values "9 9 9" oom-score-adj yes
-            assert_equal [get_oom_score_adj] [expr 9+22]
+            assert_equal [get_oom_score_adj] [expr 9+$custom_oom]
 
             r config set oom-score-adj no
-            assert_equal [get_oom_score_adj] 22
+            assert_equal [get_oom_score_adj] $custom_oom
         }
 
         test {CONFIG SET oom score relative and absolute} {
-            set custom_oom 9
             r config set oom-score-adj no
             set base_oom [get_oom_score_adj]
 
+            set custom_oom 9
             r config set oom-score-adj-values "$custom_oom $custom_oom $custom_oom" oom-score-adj relative
             assert_equal [get_oom_score_adj] [expr $base_oom+$custom_oom]
 
-            r config set oom-score-adj absolute
+            set custom_oom [expr [get_oom_score_adj] + 1]
+            r config set oom-score-adj-values "$custom_oom $custom_oom $custom_oom" oom-score-adj absolute
             assert_equal [get_oom_score_adj] $custom_oom
         }
 
diff --git a/tests/unit/other.tcl b/tests/unit/other.tcl
index 975e9445aee..52f1fa75cc7 100644
--- a/tests/unit/other.tcl
+++ b/tests/unit/other.tcl
@@ -124,7 +124,8 @@ start_server {tags {"other"}} {
         if {$::accurate} {set numops 10000} else {set numops 1000}
         test {Check consistency of different data types after a reload} {
             r flushdb
-            createComplexDataset r $numops usetag
+            # TODO: integrate usehexpire following next commit that will support replication
+            createComplexDataset r $numops {usetag usehexpire}
             if {$::ignoredigest} {
                 set _ 1
             } else {
@@ -360,7 +361,7 @@ start_server {tags {"other external:skip"}} {
         r config set save ""
         r config set rdb-key-save-delay 1000000
 
-        populate 4096 "" 1
+        populate 4095 "" 1
         r bgsave
         wait_for_condition 10 100 {
             [s rdb_bgsave_in_progress] eq 1
@@ -373,12 +374,14 @@ start_server {tags {"other external:skip"}} {
         assert_no_match "*table size: 8192*" [r debug HTSTATS 9]
         exec kill -9 [get_child_pid 0]
         waitForBgsave r
-        after 200 ;# waiting for serverCron
 
         # Hash table should rehash since there is no child process,
-        # size is power of two and over 4098, so it is 8192
-        r set k3 v3
-        assert_match "*table size: 8192*" [r debug HTSTATS 9]
+        # size is power of two and over 4096, so it is 8192
+        wait_for_condition 50 100 {
+            [string match "*table size: 8192*" [r debug HTSTATS 9]]
+        } else {
+            fail "hash table did not rehash after child process killed"
+        }
     } {} {needs:debug needs:local-process}
 }
 
@@ -426,3 +429,83 @@ start_server {tags {"other external:skip"}} {
     }
 }
 
+start_cluster 1 0 {tags {"other external:skip cluster slow"}} {
+    r config set dynamic-hz no hz 500
+    test "Redis can trigger resizing" {
+        r flushall
+        # hashslot(foo) is 12182
+        for {set j 1} {$j <= 128} {incr j} {
+            r set "{foo}$j" a
+        }
+        assert_match "*table size: 128*" [r debug HTSTATS 0]
+
+        # disable resizing, the reason for not using slow bgsave is because
+        # it will hit the dict_force_resize_ratio.
+        r debug dict-resizing 0
+
+        # delete data to have lot's (96%) of empty buckets
+        for {set j 1} {$j <= 123} {incr j} {
+            r del "{foo}$j"
+        }
+        assert_match "*table size: 128*" [r debug HTSTATS 0]
+
+        # enable resizing
+        r debug dict-resizing 1
+
+        # waiting for serverCron to resize the tables
+        wait_for_condition 1000 10 {
+            [string match {*table size: 8*} [r debug HTSTATS 0]]
+        } else {
+            puts [r debug HTSTATS 0]
+            fail "hash tables weren't resize."
+        }
+    } {} {needs:debug}
+
+    test "Redis can rewind and trigger smaller slot resizing" {
+        # hashslot(foo) is 12182
+        # hashslot(alice) is 749, smaller than hashslot(foo),
+        # attempt to trigger a resize on it, see details in #12802.
+        for {set j 1} {$j <= 128} {incr j} {
+            r set "{alice}$j" a
+        }
+
+        # disable resizing, the reason for not using slow bgsave is because
+        # it will hit the dict_force_resize_ratio.
+        r debug dict-resizing 0
+
+        for {set j 1} {$j <= 123} {incr j} {
+            r del "{alice}$j"
+        }
+
+        # enable resizing
+        r debug dict-resizing 1
+
+        # waiting for serverCron to resize the tables
+        wait_for_condition 1000 10 {
+            [string match {*table size: 16*} [r debug HTSTATS 0]]
+        } else {
+            puts [r debug HTSTATS 0]
+            fail "hash tables weren't resize."
+        }
+    } {} {needs:debug}
+}
+
+start_server {tags {"other external:skip"}} {
+    test "Redis can resize empty dict" {
+        # Write and then delete 128 keys, creating an empty dict
+        r flushall
+        for {set j 1} {$j <= 128} {incr j} {
+            r set $j{b} a
+        }
+        for {set j 1} {$j <= 128} {incr j} {
+            r del $j{b}
+        }
+        # The dict containing 128 keys must have expanded,
+        # its hash table itself takes a lot more than 400 bytes
+        wait_for_condition 100 50 {
+            [dict get [r memory stats] db.9 overhead.hashtable.main] < 400
+        } else {
+            fail "dict did not resize in time"
+        }   
+    }
+}
diff --git a/tests/unit/pubsub.tcl b/tests/unit/pubsub.tcl
index 3797b00c7f8..5ac3e8252d0 100644
--- a/tests/unit/pubsub.tcl
+++ b/tests/unit/pubsub.tcl
@@ -353,17 +353,60 @@ start_server {tags {"pubsub network"}} {
         $rd1 close
     }
 
-    test "Keyspace notifications: hash events test" {
-        r config set notify-keyspace-events Kh
+    foreach {type max_lp_entries} {listpackex 512 hashtable 0} {
+    test "Keyspace notifications: hash events test ($type)" {
+        r config set hash-max-listpack-entries $max_lp_entries
+        r config set notify-keyspace-events Khg
         r del myhash
         set rd1 [redis_deferring_client]
         assert_equal {1} [psubscribe $rd1 *]
-        r hmset myhash yes 1 no 0
+        r hmset myhash yes 1 no 0 f1 1 f2 2 f3_hdel 3
         r hincrby myhash yes 10
+        r hexpire myhash 999999 FIELDS 1 yes
+        r hexpireat myhash [expr {[clock seconds] + 999999}] NX FIELDS 1 no
+        r hpexpire myhash 999999 FIELDS 1 yes
+        r hpersist myhash FIELDS 1 yes
+        r hpexpire myhash 0 FIELDS 1 yes
+        assert_encoding $type myhash
         assert_equal "pmessage * __keyspace@${db}__:myhash hset" [$rd1 read]
         assert_equal "pmessage * __keyspace@${db}__:myhash hincrby" [$rd1 read]
+        assert_equal "pmessage * __keyspace@${db}__:myhash hexpire" [$rd1 read]
+        assert_equal "pmessage * __keyspace@${db}__:myhash hexpire" [$rd1 read]
+        assert_equal "pmessage * __keyspace@${db}__:myhash hexpire" [$rd1 read]
+        assert_equal "pmessage * __keyspace@${db}__:myhash hpersist" [$rd1 read]
+        assert_equal "pmessage * __keyspace@${db}__:myhash hexpired" [$rd1 read]
+
+        # Test that we will get `hexpired` notification when
+        # a hash field is removed by active expire.
+        r hpexpire myhash 10 FIELDS 1 no
+        after 100 ;# Wait for active expire
+        assert_equal "pmessage * __keyspace@${db}__:myhash hexpire" [$rd1 read]
+        assert_equal "pmessage * __keyspace@${db}__:myhash hexpired" [$rd1 read]
+
+        # Test that when a field with TTL is deleted by commands like hdel without
+        # updating the global DS, active expire will not send a notification.
+        r hpexpire myhash 100 FIELDS 1 f3_hdel
+        r hdel myhash f3_hdel
+        after 200 ;# Wait for active expire
+        assert_equal "pmessage * __keyspace@${db}__:myhash hexpire" [$rd1 read]
+        assert_equal "pmessage * __keyspace@${db}__:myhash hdel" [$rd1 read]
+
+        # Test that we will get `hexpired` notification when
+        # a hash field is removed by lazy expire.
+        r debug set-active-expire 0
+        r hpexpire myhash 10 FIELDS 2 f1 f2
+        after 20
+        r hmget myhash f1 f2 ;# Trigger lazy expire
+        assert_equal "pmessage * __keyspace@${db}__:myhash hexpire" [$rd1 read]
+        # We should get only one `hexpired` notification even two fields was expired.
+        assert_equal "pmessage * __keyspace@${db}__:myhash hexpired" [$rd1 read]
+        # We should get a `del` notification after all fields were expired.
+        assert_equal "pmessage * __keyspace@${db}__:myhash del" [$rd1 read]
+        r debug set-active-expire 1
+
         $rd1 close
-    }
+    } {0} {needs:debug}
+    } ;# foreach
 
     test "Keyspace notifications: stream events test" {
         r config set notify-keyspace-events Kt
diff --git a/tests/unit/scan.tcl b/tests/unit/scan.tcl
index 41234f4deeb..006965c938f 100644
--- a/tests/unit/scan.tcl
+++ b/tests/unit/scan.tcl
@@ -1,5 +1,5 @@
-start_server {tags {"scan network"}} {
-    test "SCAN basic" {
+proc test_scan {type} {
+    test "{$type} SCAN basic" {
         r flushdb
         populate 1000
 
@@ -17,7 +17,7 @@ start_server {tags {"scan network"}} {
         assert_equal 1000 [llength $keys]
     }
 
-    test "SCAN COUNT" {
+   test "{$type} SCAN COUNT" {
         r flushdb
         populate 1000
 
@@ -35,7 +35,7 @@ start_server {tags {"scan network"}} {
         assert_equal 1000 [llength $keys]
     }
 
-    test "SCAN MATCH" {
+    test "{$type} SCAN MATCH" {
         r flushdb
         populate 1000
 
@@ -53,7 +53,7 @@ start_server {tags {"scan network"}} {
         assert_equal 100 [llength $keys]
     }
 
-    test "SCAN TYPE" {
+    test "{$type} SCAN TYPE" {
         r flushdb
         # populate only creates strings
         populate 1000
@@ -98,7 +98,7 @@ start_server {tags {"scan network"}} {
         assert_equal 1000 [llength $keys]
     }
 
-    test "SCAN unknown type" {
+    test "{$type} SCAN unknown type" {
         r flushdb
         # make sure that passive expiration is triggered by the scan
         r debug set-active-expire 0
@@ -115,7 +115,7 @@ start_server {tags {"scan network"}} {
         r debug set-active-expire 1
     } {OK} {needs:debug}
 
-    test "SCAN with expired keys" {
+    test "{$type} SCAN with expired keys" {
         r flushdb
         # make sure that passive expiration is triggered by the scan
         r debug set-active-expire 0
@@ -148,7 +148,7 @@ start_server {tags {"scan network"}} {
         r debug set-active-expire 1
     } {OK} {needs:debug}
 
-    test "SCAN with expired keys with TYPE filter" {
+    test "{$type} SCAN with expired keys with TYPE filter" {
         r flushdb
         # make sure that passive expiration is triggered by the scan
         r debug set-active-expire 0
@@ -182,7 +182,7 @@ start_server {tags {"scan network"}} {
     } {OK} {needs:debug}
 
     foreach enc {intset listpack hashtable} {
-        test "SSCAN with encoding $enc" {
+        test "{$type} SSCAN with encoding $enc" {
             # Create the Set
             r del set
             if {$enc eq {intset}} {
@@ -217,7 +217,7 @@ start_server {tags {"scan network"}} {
     }
 
     foreach enc {listpack hashtable} {
-        test "HSCAN with encoding $enc" {
+        test "{$type} HSCAN with encoding $enc" {
             # Create the Hash
             r del hash
             if {$enc eq {listpack}} {
@@ -253,11 +253,41 @@ start_server {tags {"scan network"}} {
 
             set keys2 [lsort -unique $keys2]
             assert_equal $count [llength $keys2]
+
+            # Test NOVALUES 
+            set res [r hscan hash 0 count 1000 novalues]
+            assert_equal [lsort $keys2] [lsort [lindex $res 1]]
+        }
+
+        test "{$type} HSCAN with large value $enc" {
+            r del hash
+
+            if {$enc eq {listpack}} {
+                set count 60
+            } else {
+                set count 170
+            }
+
+            set val1 [string repeat "1" $count]
+            r hset hash $val1 $val1
+
+            set val2 [string repeat "2" $count]
+            r hset hash $val2 $val2
+
+            set res [lsort [lindex [r hscan hash 0] 1]]
+            assert_equal $val1 [lindex $res 0]
+            assert_equal $val1 [lindex $res 1]
+            assert_equal $val2 [lindex $res 2]
+            assert_equal $val2 [lindex $res 3]
+
+            set res [lsort [lindex [r hscan hash 0 novalues] 1]]
+            assert_equal $val1 [lindex $res 0]
+            assert_equal $val2 [lindex $res 1]
         }
     }
 
     foreach enc {listpack skiplist} {
-        test "ZSCAN with encoding $enc" {
+        test "{$type} ZSCAN with encoding $enc" {
             # Create the Sorted Set
             r del zset
             if {$enc eq {listpack}} {
@@ -296,7 +326,7 @@ start_server {tags {"scan network"}} {
         }
     }
 
-    test "SCAN guarantees check under write load" {
+    test "{$type} SCAN guarantees check under write load" {
         r flushdb
         populate 100
 
@@ -325,7 +355,7 @@ start_server {tags {"scan network"}} {
         assert_equal 100 [llength $keys2]
     }
 
-    test "SSCAN with integer encoded object (issue #1345)" {
+    test "{$type} SSCAN with integer encoded object (issue #1345)" {
         set objects {1 a}
         r del set
         r sadd set {*}$objects
@@ -335,28 +365,35 @@ start_server {tags {"scan network"}} {
         assert_equal [lsort -unique [lindex $res 1]] {1}
     }
 
-    test "SSCAN with PATTERN" {
+    test "{$type} SSCAN with PATTERN" {
         r del mykey
         r sadd mykey foo fab fiz foobar 1 2 3 4
         set res [r sscan mykey 0 MATCH foo* COUNT 10000]
         lsort -unique [lindex $res 1]
     } {foo foobar}
 
-    test "HSCAN with PATTERN" {
+    test "{$type} HSCAN with PATTERN" {
         r del mykey
         r hmset mykey foo 1 fab 2 fiz 3 foobar 10 1 a 2 b 3 c 4 d
         set res [r hscan mykey 0 MATCH foo* COUNT 10000]
         lsort -unique [lindex $res 1]
     } {1 10 foo foobar}
 
-    test "ZSCAN with PATTERN" {
+    test "{$type} HSCAN with NOVALUES" {
+        r del mykey
+        r hmset mykey foo 1 fab 2 fiz 3 foobar 10 1 a 2 b 3 c 4 d
+        set res [r hscan mykey 0 NOVALUES]
+        lsort -unique [lindex $res 1]
+    } {1 2 3 4 fab fiz foo foobar}
+
+    test "{$type} ZSCAN with PATTERN" {
         r del mykey
         r zadd mykey 1 foo 2 fab 3 fiz 10 foobar
         set res [r zscan mykey 0 MATCH foo* COUNT 10000]
         lsort -unique [lindex $res 1]
     }
 
-    test "ZSCAN scores: regression test for issue #2175" {
+    test "{$type} ZSCAN scores: regression test for issue #2175" {
         r del mykey
         for {set j 0} {$j < 500} {incr j} {
             r zadd mykey 9.8813129168249309e-323 $j
@@ -366,7 +403,7 @@ start_server {tags {"scan network"}} {
         assert {$first_score != 0}
     }
 
-    test "SCAN regression test for issue #4906" {
+    test "{$type} SCAN regression test for issue #4906" {
         for {set k 0} {$k < 100} {incr k} {
             r del set
             r sadd set x; # Make sure it's not intset encoded
@@ -411,4 +448,36 @@ start_server {tags {"scan network"}} {
             }
         }
     }
+
+    test "{$type} SCAN MATCH pattern implies cluster slot" {
+        # Tests the code path for an optimization for patterns like "{foo}-*"
+        # which implies that all matching keys belong to one slot.
+        r flushdb
+        for {set j 0} {$j < 100} {incr j} {
+            r set "{foo}-$j" "foo"; # slot 12182
+            r set "{bar}-$j" "bar"; # slot 5061
+            r set "{boo}-$j" "boo"; # slot 13142
+        }
+
+        set cursor 0
+        set keys {}
+        while 1 {
+            set res [r scan $cursor match "{foo}-*"]
+            set cursor [lindex $res 0]
+            set k [lindex $res 1]
+            lappend keys {*}$k
+            if {$cursor == 0} break
+        }
+
+        set keys [lsort -unique $keys]
+        assert_equal 100 [llength $keys]
+    }
+}
+
+start_server {tags {"scan network standalone"}} {
+    test_scan "standalone"
+}
+
+start_cluster 1 0 {tags {"external:skip cluster scan"}} {
+    test_scan "cluster"
 }
diff --git a/tests/unit/scripting.tcl b/tests/unit/scripting.tcl
index 29193f6422a..2f6d1c237d0 100644
--- a/tests/unit/scripting.tcl
+++ b/tests/unit/scripting.tcl
@@ -146,6 +146,14 @@ start_server {tags {"scripting"}} {
         } 1 x
     } {number 1}
 
+    test {EVAL - Lua number -> Redis integer conversion} {
+        r del hash
+        run_script {
+            local foo = redis.pcall('hincrby','hash','field',200000000)
+            return {type(foo),foo}
+        } 0
+    } {number 200000000}
+
     test {EVAL - Redis bulk -> Lua type conversion} {
         r set mykey myval
         run_script {
@@ -257,17 +265,42 @@ start_server {tags {"scripting"}} {
         run_script {return redis.pcall('wait','1','0')} 0
     } {0}
 
-    test {EVAL - Scripts can't run XREAD and XREADGROUP with BLOCK option} {
+    test {EVAL - Scripts do not block on waitaof} {
+        run_script {return redis.pcall('waitaof','0','1','0')} 0
+    } {0 0}
+
+    test {EVAL - Scripts do not block on XREAD with BLOCK option} {
         r del s
         r xgroup create s g $ MKSTREAM
         set res [run_script {return redis.pcall('xread','STREAMS','s','$')} 1 s]
         assert {$res eq {}}
-        assert_error "*xread command is not allowed with BLOCK option from scripts" {run_script {return redis.pcall('xread','BLOCK',0,'STREAMS','s','$')} 1 s}
+        run_script {return redis.pcall('xread','BLOCK',0,'STREAMS','s','$')} 1 s
+    } {}
+
+    test {EVAL - Scripts do not block on XREADGROUP with BLOCK option} {
         set res [run_script {return redis.pcall('xreadgroup','group','g','c','STREAMS','s','>')} 1 s]
         assert {$res eq {}}
-        assert_error "*xreadgroup command is not allowed with BLOCK option from scripts" {run_script {return redis.pcall('xreadgroup','group','g','c','BLOCK',0,'STREAMS','s','>')} 1 s}
+        run_script {return redis.pcall('xreadgroup','group','g','c','BLOCK',0,'STREAMS','s','>')} 1 s
+    } {}
+
+    test {EVAL - Scripts do not block on XREAD with BLOCK option -- non empty stream} {
+        r XADD s * a 1
+        set res [run_script {return redis.pcall('xread','BLOCK',0,'STREAMS','s','$')} 1 s]
+        assert {$res eq {}}
+
+        set res [run_script {return redis.pcall('xread','BLOCK',0,'STREAMS','s','0-0')} 1 s]
+        assert {[lrange [lindex $res 0 1 0 1] 0 1] eq {a 1}}
     }
 
+    test {EVAL - Scripts do not block on XREADGROUP with BLOCK option -- non empty stream} {
+        r XADD s * b 2
+        set res [
+            run_script {return redis.pcall('xreadgroup','group','g','c','BLOCK',0,'STREAMS','s','>')} 1 s
+        ]
+        assert {[llength [lindex $res 0 1]] == 2}
+        lindex $res 0 1 0 1
+    } {a 1}
+
     test {EVAL - Scripts can run non-deterministic commands} {
         set e {}
         catch {
@@ -307,6 +340,13 @@ start_server {tags {"scripting"}} {
         set e
     } {*against a key*}
 
+    test {EVAL - JSON string encoding a string larger than 2GB} {
+        run_script {
+            local s = string.rep("a", 1024 * 1024 * 1024)
+            return #cjson.encode(s..s..s)
+        } 0
+    } {3221225474} {large-memory} ;# length includes two double quotes at both ends
+
     test {EVAL - JSON numeric decoding} {
         # We must return the table as a string because otherwise
         # Redis converts floats to ints and we get 0 and 1023 instead
@@ -324,6 +364,66 @@ start_server {tags {"scripting"}} {
         } 0
     } {a b}
 
+    test {EVAL - JSON smoke test} {
+        run_script {
+            local some_map = {
+                s1="Some string",
+                n1=100,
+                a1={"Some","String","Array"},
+                nil1=nil,
+                b1=true,
+                b2=false}
+            local encoded = cjson.encode(some_map)
+            local decoded = cjson.decode(encoded)
+            assert(table.concat(some_map) == table.concat(decoded))
+
+            cjson.encode_keep_buffer(false)
+            encoded = cjson.encode(some_map)
+            decoded = cjson.decode(encoded)
+            assert(table.concat(some_map) == table.concat(decoded))
+
+            -- Table with numeric keys
+            local table1 = {one="one", [1]="one"}
+            encoded = cjson.encode(table1)
+            decoded = cjson.decode(encoded)
+            assert(decoded["one"] == table1["one"])
+            assert(decoded["1"] == table1[1])
+
+            -- Array
+            local array1 = {[1]="one", [2]="two"}
+            encoded = cjson.encode(array1)
+            decoded = cjson.decode(encoded)
+            assert(table.concat(array1) == table.concat(decoded))
+
+            -- Invalid keys
+            local invalid_map = {}
+            invalid_map[false] = "false"
+            local ok, encoded = pcall(cjson.encode, invalid_map)
+            assert(ok == false)
+
+            -- Max depth
+            cjson.encode_max_depth(1)
+            ok, encoded = pcall(cjson.encode, some_map)
+            assert(ok == false)
+
+            cjson.decode_max_depth(1)
+            ok, decoded = pcall(cjson.decode, '{"obj": {"array": [1,2,3,4]}}')
+            assert(ok == false)
+
+            -- Invalid numbers
+            ok, encoded = pcall(cjson.encode, {num1=0/0})
+            assert(ok == false)
+            cjson.encode_invalid_numbers(true)
+            ok, encoded = pcall(cjson.encode, {num1=0/0})
+            assert(ok == true)
+
+            -- Restore defaults
+            cjson.decode_max_depth(1000)
+            cjson.encode_max_depth(1000)
+            cjson.encode_invalid_numbers(false)
+        } 0
+    }
+
     test {EVAL - cmsgpack can pack double?} {
         run_script {local encoded = cmsgpack.pack(0.1)
                 local h = ""
@@ -344,6 +444,68 @@ start_server {tags {"scripting"}} {
         } 0
     } {d3ffffff0000000000}
 
+    test {EVAL - cmsgpack pack/unpack smoke test} {
+        run_script {
+                local str_lt_32 = string.rep("x", 30)
+                local str_lt_255 = string.rep("x", 250)
+                local str_lt_65535 = string.rep("x", 65530)
+                local str_long = string.rep("x", 100000)
+                local array_lt_15 = {1, 2, 3, 4, 5}
+                local array_lt_65535 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}
+                local array_big = {}
+                for i=1, 100000 do
+                    array_big[i] = i
+                end
+                local map_lt_15 = {a=1, b=2}
+                local map_big = {}
+                for i=1, 100000 do
+                    map_big[tostring(i)] = i
+                end
+                local some_map = {
+                    s1=str_lt_32,
+                    s2=str_lt_255,
+                    s3=str_lt_65535,
+                    s4=str_long,
+                    d1=0.1,
+                    i1=1,
+                    i2=250,
+                    i3=65530,
+                    i4=100000,
+                    i5=2^40,
+                    i6=-1,
+                    i7=-120,
+                    i8=-32000,
+                    i9=-100000,
+                    i10=-3147483648,
+                    a1=array_lt_15,
+                    a2=array_lt_65535,
+                    a3=array_big,
+                    m1=map_lt_15,
+                    m2=map_big,
+                    b1=false,
+                    b2=true,
+                    n=nil
+                }
+                local encoded = cmsgpack.pack(some_map)
+                local decoded = cmsgpack.unpack(encoded)
+                assert(table.concat(some_map) == table.concat(decoded))
+                local offset, decoded_one = cmsgpack.unpack_one(encoded, 0)
+                assert(table.concat(some_map) == table.concat(decoded_one))
+                assert(offset == -1)
+
+                local encoded_multiple = cmsgpack.pack(str_lt_32, str_lt_255, str_lt_65535, str_long)
+                local offset, obj = cmsgpack.unpack_limit(encoded_multiple, 1, 0)
+                assert(obj == str_lt_32)
+                offset, obj = cmsgpack.unpack_limit(encoded_multiple, 1, offset)
+                assert(obj == str_lt_255)
+                offset, obj = cmsgpack.unpack_limit(encoded_multiple, 1, offset)
+                assert(obj == str_lt_65535)
+                offset, obj = cmsgpack.unpack_limit(encoded_multiple, 1, offset)
+                assert(obj == str_long)
+                assert(offset == -1)
+        } 0
+    }
+
     test {EVAL - cmsgpack can pack and unpack circular references?} {
         run_script {local a = {x=nil,y=5}
                 local b = {x=a}
@@ -414,13 +576,19 @@ start_server {tags {"scripting"}} {
     # script command is only relevant for is_eval Lua
     test {SCRIPTING FLUSH - is able to clear the scripts cache?} {
         r set mykey myval
+
+        r script load {return redis.call('get',KEYS[1])}
         set v [r evalsha fd758d1589d044dd850a6f05d52f2eefd27f033f 1 mykey]
         assert_equal $v myval
-        set e ""
         r script flush
-        catch {r evalsha fd758d1589d044dd850a6f05d52f2eefd27f033f 1 mykey} e
-        set e
-    } {NOSCRIPT*}
+        assert_error {NOSCRIPT*} {r evalsha fd758d1589d044dd850a6f05d52f2eefd27f033f 1 mykey}
+
+        r eval {return redis.call('get',KEYS[1])} 1 mykey
+        set v [r evalsha fd758d1589d044dd850a6f05d52f2eefd27f033f 1 mykey]
+        assert_equal $v myval
+        r script flush
+        assert_error {NOSCRIPT*} {r evalsha fd758d1589d044dd850a6f05d52f2eefd27f033f 1 mykey}
+    }
 
     test {SCRIPTING FLUSH ASYNC} {
         for {set j 0} {$j < 100} {incr j} {
@@ -466,6 +634,52 @@ start_server {tags {"scripting"}} {
              [run_script {return redis.sha1hex('Pizza & Mandolino')} 0]
     } {da39a3ee5e6b4b0d3255bfef95601890afd80709 74822d82031af7493c20eefa13bd07ec4fada82f}
 
+    test "Measures elapsed time os.clock()" {
+        set escaped [run_script {
+            local start = os.clock()
+            while os.clock() - start < 1 do end
+            return {double = os.clock() - start}
+        } 0]
+        assert_morethan_equal $escaped 1 ;# 1 second
+    }
+
+    test "Prohibit dangerous lua methods in sandbox" {
+        assert_equal "" [run_script {
+            local allowed_methods = {"clock"}
+            -- Find a value from a tuple and return the position.
+            local indexOf = function(tuple, value)
+                for i, v in ipairs(tuple) do
+                    if v == value then return i end
+                end
+                return nil
+            end
+            -- Check for disallowed methods and verify all allowed methods exist.
+            -- If an allowed method is found, it's removed from 'allowed_methods'.
+            -- If 'allowed_methods' is empty at the end, all allowed methods were found.
+            for key, value in pairs(os) do
+                local index = indexOf(allowed_methods, key)
+                if index == nil or type(value) ~= "function" then
+                    return "Disallowed "..type(value)..":"..key
+                end
+                table.remove(allowed_methods, index)
+            end
+            if #allowed_methods ~= 0 then
+                return "Expected method not found: "..table.concat(allowed_methods, ",")
+            end
+            return ""
+        } 0]
+    }
+
+    test "Verify execution of prohibit dangerous Lua methods will fail" {
+        assert_error {ERR *attempt to call field 'execute'*} {run_script {os.execute()} 0}
+        assert_error {ERR *attempt to call field 'exit'*} {run_script {os.exit()} 0}
+        assert_error {ERR *attempt to call field 'getenv'*} {run_script {os.getenv()} 0}
+        assert_error {ERR *attempt to call field 'remove'*} {run_script {os.remove()} 0}
+        assert_error {ERR *attempt to call field 'rename'*} {run_script {os.rename()} 0}
+        assert_error {ERR *attempt to call field 'setlocale'*} {run_script {os.setlocale()} 0}
+        assert_error {ERR *attempt to call field 'tmpname'*} {run_script {os.tmpname()} 0}
+    }
+
     test {Globals protection reading an undeclared global variable} {
         catch {run_script {return a} 0} e
         set e
@@ -525,6 +739,7 @@ start_server {tags {"scripting"}} {
     } ;# is_eval
 
     test {EVAL does not leak in the Lua stack} {
+        r script flush ;# reset Lua VM
         r set x 0
         # Use a non blocking client to speedup the loop.
         set rd [redis_deferring_client]
@@ -989,6 +1204,53 @@ start_server {tags {"scripting"}} {
         r ping
     } {PONG}
 
+    test {Timedout scripts and unblocked command} {
+        # make sure a command that's allowed during BUSY doesn't trigger an unblocked command
+
+        # enable AOF to also expose an assertion if the bug would happen
+        r flushall
+        r config set appendonly yes
+
+        # create clients, and set one to block waiting for key 'x'
+        set rd [redis_deferring_client]
+        set rd2 [redis_deferring_client]
+        set r3 [redis_client]
+        $rd2 blpop x 0
+        wait_for_blocked_clients_count 1
+
+        # hack: allow the script to use client list command so that we can control when it aborts
+        r DEBUG set-disable-deny-scripts 1
+        r config set lua-time-limit 10
+        run_script_on_connection $rd {
+            local clients
+            redis.call('lpush',KEYS[1],'y');
+            while true do
+                clients = redis.call('client','list')
+                if string.find(clients, 'abortscript') ~= nil then break end
+            end
+            redis.call('lpush',KEYS[1],'z');
+            return clients
+            } 1 x
+
+        # wait for the script to be busy
+        after 200
+        catch {r ping} e
+        assert_match {BUSY*} $e
+
+        # run cause the script to abort, and run a command that could have processed
+        # unblocked clients (due to a bug)
+        $r3 hello 2 setname abortscript
+
+        # make sure the script completed before the pop was processed
+        assert_equal [$rd2 read] {x z}
+        assert_match {*abortscript*} [$rd read]
+
+        $rd close
+        $rd2 close
+        $r3 close
+        r DEBUG set-disable-deny-scripts 0
+    } {OK} {external:skip needs:debug}
+
     test {Timedout scripts that modified data can't be killed by SCRIPT KILL} {
         set rd [redis_deferring_client]
         r config set lua-time-limit 10
@@ -1258,6 +1520,93 @@ start_server {tags {"scripting needs:debug external:skip"}} {
         assert_equal [r ping] {PONG}
     }
 }
+
+start_server {tags {"scripting external:skip"}} {
+    test {Lua scripts eviction does not generate many scripts} {
+        r script flush
+        r config resetstat
+
+        # "return 1" sha is: e0e1f9fabfc9d4800c877a703b823ac0578ff8db
+        # "return 500" sha is: 98fe65896b61b785c5ed328a5a0a1421f4f1490c
+        for {set j 1} {$j <= 250} {incr j} {
+            r eval "return $j" 0
+        }
+        for {set j 251} {$j <= 500} {incr j} {
+            r eval_ro "return $j" 0
+        }
+        assert_equal [s number_of_cached_scripts] 500
+        assert_equal 1 [r evalsha e0e1f9fabfc9d4800c877a703b823ac0578ff8db 0]
+        assert_equal 1 [r evalsha_ro e0e1f9fabfc9d4800c877a703b823ac0578ff8db 0]
+        assert_equal 500 [r evalsha 98fe65896b61b785c5ed328a5a0a1421f4f1490c 0]
+        assert_equal 500 [r evalsha_ro 98fe65896b61b785c5ed328a5a0a1421f4f1490c 0]
+
+        # Scripts between "return 1" and "return 500" are evicted
+        for {set j 501} {$j <= 750} {incr j} {
+            r eval "return $j" 0
+        }
+        for {set j 751} {$j <= 1000} {incr j} {
+            r eval "return $j" 0
+        }
+        assert_error {NOSCRIPT*} {r evalsha e0e1f9fabfc9d4800c877a703b823ac0578ff8db 0}
+        assert_error {NOSCRIPT*} {r evalsha_ro e0e1f9fabfc9d4800c877a703b823ac0578ff8db 0}
+        assert_error {NOSCRIPT*} {r evalsha 98fe65896b61b785c5ed328a5a0a1421f4f1490c 0}
+        assert_error {NOSCRIPT*} {r evalsha_ro 98fe65896b61b785c5ed328a5a0a1421f4f1490c 0}
+
+        assert_equal [s evicted_scripts] 500
+        assert_equal [s number_of_cached_scripts] 500
+    }
+
+    test {Lua scripts eviction is plain LRU} {
+        r script flush
+        r config resetstat
+
+        # "return 1" sha is: e0e1f9fabfc9d4800c877a703b823ac0578ff8db
+        # "return 2" sha is: 7f923f79fe76194c868d7e1d0820de36700eb649
+        # "return 3" sha is: 09d3822de862f46d784e6a36848b4f0736dda47a
+        # "return 500" sha is: 98fe65896b61b785c5ed328a5a0a1421f4f1490c
+        # "return 1000" sha is: 94f1a7bc9f985a1a1d5a826a85579137d9d840c8
+        for {set j 1} {$j <= 500} {incr j} {
+            r eval "return $j" 0
+        }
+
+        # Call "return 1" to move it to the tail.
+        r eval "return 1" 0
+        # Call "return 2" to move it to the tail.
+        r evalsha 7f923f79fe76194c868d7e1d0820de36700eb649 0
+        # Create a new script, "return 3" will be evicted.
+        r eval "return 1000" 0
+        # "return 1" is ok since it was moved to tail.
+        assert_equal 1 [r evalsha e0e1f9fabfc9d4800c877a703b823ac0578ff8db 0]
+        # "return 2" is ok since it was moved to tail.
+        assert_equal 1 [r evalsha e0e1f9fabfc9d4800c877a703b823ac0578ff8db 0]
+        # "return 3" was evicted.
+        assert_error {NOSCRIPT*} {r evalsha 09d3822de862f46d784e6a36848b4f0736dda47a 0}
+        # Others are ok.
+        assert_equal 500 [r evalsha 98fe65896b61b785c5ed328a5a0a1421f4f1490c 0]
+        assert_equal 1000 [r evalsha 94f1a7bc9f985a1a1d5a826a85579137d9d840c8 0]
+
+        assert_equal [s evicted_scripts] 1
+        assert_equal [s number_of_cached_scripts] 500
+    }
+
+    test {Lua scripts eviction does not affect script load} {
+        r script flush
+        r config resetstat
+
+        set num [randomRange 500 1000]
+        for {set j 1} {$j <= $num} {incr j} {
+            r script load "return $j"
+            r eval "return 'str_$j'" 0
+        }
+        set evicted [s evicted_scripts]
+        set cached [s number_of_cached_scripts]
+        # evicted = num eval scripts - 500 eval scripts
+        assert_equal $evicted [expr $num-500]
+        # cached = num load scripts + 500 eval scripts
+        assert_equal $cached [expr $num+500]
+    }
+}
+
 } ;# is_eval
 
 start_server {tags {"scripting needs:debug"}} {
diff --git a/tests/unit/slowlog.tcl b/tests/unit/slowlog.tcl
index 3c547b924be..e7f82ce7f90 100644
--- a/tests/unit/slowlog.tcl
+++ b/tests/unit/slowlog.tcl
@@ -14,6 +14,16 @@ start_server {tags {"slowlog"} overrides {slowlog-log-slower-than 1000000}} {
         assert_equal [r slowlog len] 1
     } {} {needs:debug}
 
+    test {SLOWLOG - zero max length is correctly handled} {
+        r SLOWLOG reset
+        r config set slowlog-max-len 0
+        r config set slowlog-log-slower-than 0
+        for {set i 0} {$i < 100} {incr i} {
+            r ping
+        }
+        r slowlog len
+    } {0}
+
     test {SLOWLOG - max entries is correctly handled} {
         r config set slowlog-log-slower-than 0
         r config set slowlog-max-len 10
@@ -24,7 +34,7 @@ start_server {tags {"slowlog"} overrides {slowlog-log-slower-than 1000000}} {
     } {10}
 
     test {SLOWLOG - GET optional argument to limit output len works} {
-        
+
         assert_equal 5  [llength [r slowlog get 5]]
         assert_equal 10 [llength [r slowlog get -1]]
         assert_equal 10 [llength [r slowlog get 20]]
@@ -42,7 +52,7 @@ start_server {tags {"slowlog"} overrides {slowlog-log-slower-than 1000000}} {
         set e [lindex [r slowlog get] 0]
         assert_equal [llength $e] 6
         if {!$::external} {
-            assert_equal [lindex $e 0] 107
+            assert_equal [lindex $e 0] 106
         }
         assert_equal [expr {[lindex $e 2] > 100000}] 1
         assert_equal [lindex $e 3] {debug sleep 0.2}
@@ -50,22 +60,35 @@ start_server {tags {"slowlog"} overrides {slowlog-log-slower-than 1000000}} {
     } {} {needs:debug}
 
     test {SLOWLOG - Certain commands are omitted that contain sensitive information} {
+        r config set slowlog-max-len 100
         r config set slowlog-log-slower-than 0
         r slowlog reset
         catch {r acl setuser "slowlog test user" +get +set} _
+        r config set masteruser ""
         r config set masterauth ""
+        r config set requirepass ""
+        r config set tls-key-file-pass ""
+        r config set tls-client-key-file-pass ""
         r acl setuser slowlog-test-user +get +set
+        r acl getuser slowlog-test-user
+        r acl deluser slowlog-test-user non-existing-user
         r config set slowlog-log-slower-than 0
         r config set slowlog-log-slower-than -1
-        set slowlog_resp [r slowlog get]
+        set slowlog_resp [r slowlog get -1]
 
         # Make sure normal configs work, but the two sensitive
         # commands are omitted or redacted
-        assert_equal 5 [llength $slowlog_resp]
-        assert_equal {slowlog reset} [lindex [lindex $slowlog_resp 4] 3]
+        assert_equal 11 [llength $slowlog_resp]
+        assert_equal {slowlog reset} [lindex [lindex $slowlog_resp 10] 3]
+        assert_equal {acl setuser (redacted) (redacted) (redacted)} [lindex [lindex $slowlog_resp 9] 3]
+        assert_equal {config set masteruser (redacted)} [lindex [lindex $slowlog_resp 8] 3]
+        assert_equal {config set masterauth (redacted)} [lindex [lindex $slowlog_resp 7] 3]
+        assert_equal {config set requirepass (redacted)} [lindex [lindex $slowlog_resp 6] 3]
+        assert_equal {config set tls-key-file-pass (redacted)} [lindex [lindex $slowlog_resp 5] 3]
+        assert_equal {config set tls-client-key-file-pass (redacted)} [lindex [lindex $slowlog_resp 4] 3]
         assert_equal {acl setuser (redacted) (redacted) (redacted)} [lindex [lindex $slowlog_resp 3] 3]
-        assert_equal {config set masterauth (redacted)} [lindex [lindex $slowlog_resp 2] 3]
-        assert_equal {acl setuser (redacted) (redacted) (redacted)} [lindex [lindex $slowlog_resp 1] 3]
+        assert_equal {acl getuser (redacted)} [lindex [lindex $slowlog_resp 2] 3]
+        assert_equal {acl deluser (redacted) (redacted)} [lindex [lindex $slowlog_resp 1] 3]
         assert_equal {config set slowlog-log-slower-than 0} [lindex [lindex $slowlog_resp 0] 3]
     } {} {needs:repl}
 
diff --git a/tests/unit/sort.tcl b/tests/unit/sort.tcl
index 7ef4fb1e5da..a46f77cf9b5 100644
--- a/tests/unit/sort.tcl
+++ b/tests/unit/sort.tcl
@@ -75,12 +75,14 @@ start_server {
         assert_equal [lsort -integer $result] [r sort tosort GET #]
     } {} {cluster:skip}
 
-    test "SORT GET <const>" {
+foreach command {SORT SORT_RO} {
+    test "$command GET <const>" {
         r del foo
-        set res [r sort tosort GET foo]
+        set res [r $command tosort GET foo]
         assert_equal 16 [llength $res]
         foreach item $res { assert_equal {} $item }
     } {} {cluster:skip}
+}
 
     test "SORT GET (key and hash) with sanity check" {
         set l1 [r sort tosort GET # GET weight_*]
@@ -109,6 +111,10 @@ start_server {
     test "SORT extracts STORE correctly" {
         r command getkeys sort abc store def
     } {abc def}
+    
+    test "SORT_RO get keys" {
+        r command getkeys sort_ro abc
+    } {abc}
 
     test "SORT extracts multiple STORE correctly" {
         r command getkeys sort abc store invalid store stillbad store def
@@ -350,4 +356,52 @@ start_server {
             }
         }
     }
+
+    test {SORT STORE quicklist with the right options} {
+        set origin_config [config_get_set list-max-listpack-size -1]
+        r del lst{t} lst_dst{t}
+        r config set list-max-listpack-size -1
+        r config set list-compress-depth 12
+        r lpush lst{t} {*}[split [string repeat "1" 6000] ""]
+        r sort lst{t} store lst_dst{t}
+        assert_encoding quicklist lst_dst{t}
+        assert_match "*ql_listpack_max:-1 ql_compressed:1*" [r debug object lst_dst{t}]
+        config_set list-max-listpack-size $origin_config
+    } {} {needs:debug}
+}
+
+start_cluster 1 0 {tags {"external:skip cluster sort"}} {
+
+    r flushall
+    r lpush "{a}mylist" 1 2 3
+    r set "{a}by1" 20
+    r set "{a}by2" 30
+    r set "{a}by3" 0
+    r set "{a}get1" 200
+    r set "{a}get2" 100
+    r set "{a}get3" 30
+
+    test "sort by in cluster mode" {
+        catch {r sort "{a}mylist" by by*} e
+        assert_match {ERR BY option of SORT denied in Cluster mode when *} $e
+        r sort "{a}mylist" by "{a}by*"
+    } {3 1 2}
+
+    test "sort get in cluster mode" {
+        catch {r sort "{a}mylist" by "{a}by*" get get*} e
+        assert_match {ERR GET option of SORT denied in Cluster mode when *} $e
+        r sort "{a}mylist" by "{a}by*" get "{a}get*"
+    } {30 200 100}
+
+    test "sort_ro by in cluster mode" {
+        catch {r sort_ro "{a}mylist" by by*} e
+        assert_match {ERR BY option of SORT denied in Cluster mode when *} $e
+        r sort_ro "{a}mylist" by "{a}by*"
+    } {3 1 2}
+
+    test "sort_ro get in cluster mode" {
+        catch {r sort_ro "{a}mylist" by "{a}by*" get get*} e
+        assert_match {ERR GET option of SORT denied in Cluster mode when *} $e
+        r sort_ro "{a}mylist" by "{a}by*" get "{a}get*"
+    } {30 200 100}
 }
diff --git a/tests/unit/tracking.tcl b/tests/unit/tracking.tcl
index bea8508b1e4..666b5930e43 100644
--- a/tests/unit/tracking.tcl
+++ b/tests/unit/tracking.tcl
@@ -876,7 +876,7 @@ start_server {tags {"tracking network logreqres:skip"}} {
     $rd close
 }
 
-# Just some extra covergae for --log-req-res, because we do not
+# Just some extra coverage for --log-req-res, because we do not
 # run the full tracking unit in that mode
 start_server {tags {"tracking network"}} {
     test {Coverage: Basic CLIENT CACHING} {
diff --git a/tests/unit/type/hash-field-expire.tcl b/tests/unit/type/hash-field-expire.tcl
new file mode 100644
index 00000000000..b6dd58043c7
--- /dev/null
+++ b/tests/unit/type/hash-field-expire.tcl
@@ -0,0 +1,1190 @@
+######## HEXPIRE family commands
+# Field does not exists
+set E_NO_FIELD     -2
+# Specified NX | XX | GT | LT condition not met
+set E_FAIL         0
+# expiration time set/updated
+set E_OK           1
+# Field deleted because the specified expiration time is in the past
+set E_DELETED      2
+
+######## HTTL family commands
+set T_NO_FIELD    -2
+set T_NO_EXPIRY   -1
+
+######## HPERIST
+set P_NO_FIELD    -2
+set P_NO_EXPIRY   -1
+set P_OK           1
+
+############################### AUX FUNCS ######################################
+
+proc get_hashes_with_expiry_fields {r} {
+    set input_string [r info keyspace]
+    set hash_count 0
+
+    foreach line [split $input_string \n] {
+        if {[regexp {hashes_with_expiry_fields=(\d+)} $line -> value]} {
+            return $value
+        }
+    }
+
+    return 0
+}
+
+proc get_keys {l} {
+    set res {}
+    foreach entry $l {
+        set key [lindex $entry 0]
+        lappend res $key
+    }
+    return $res
+}
+
+proc dumpAllHashes {client} {
+    set keyAndFields(0,0) 0
+    unset keyAndFields
+    # keep keys sorted for comparison
+    foreach key [lsort [$client keys *]] {
+        set fields [$client hgetall $key]
+        foreach f $fields {
+            set keyAndFields($key,$f) [$client hpexpiretime $key FIELDS 1 $f]
+        }
+    }
+    return [array get keyAndFields]
+}
+
+############################### TESTS #########################################
+
+start_server {tags {"external:skip needs:debug"}} {
+    foreach type {listpackex hashtable} {
+        if {$type eq "hashtable"} {
+            r config set hash-max-listpack-entries 0
+        } else {
+            r config set hash-max-listpack-entries 512
+        }
+
+        test "HEXPIRE/HEXPIREAT/HPEXPIRE/HPEXPIREAT - Returns array if the key does not exist" {
+            r del myhash
+            assert_equal [r HEXPIRE myhash 1000 FIELDS 1 a] [list $E_NO_FIELD]
+            assert_equal [r HEXPIREAT myhash 1000 FIELDS 1 a] [list $E_NO_FIELD]
+            assert_equal [r HPEXPIRE myhash 1000 FIELDS 2 a b] [list $E_NO_FIELD $E_NO_FIELD]
+            assert_equal [r HPEXPIREAT myhash 1000 FIELDS 2 a b] [list $E_NO_FIELD $E_NO_FIELD]
+        }
+
+        test "HEXPIRE/HEXPIREAT/HPEXPIRE/HPEXPIREAT - Verify that the expire time does not overflow" {
+            r del myhash
+            r hset myhash f1 v1
+            # The expire time can't be negative.
+            assert_error {ERR invalid expire time, must be >= 0} {r HEXPIRE myhash -1 FIELDS 1 f1}
+            assert_error {ERR invalid expire time, must be >= 0} {r HEXPIRE myhash -9223372036854775808 FIELDS 1 f1}
+            # The expire time can't be greater than the EB_EXPIRE_TIME_MAX
+            assert_error {ERR invalid expire time in 'hexpire' command} {r HEXPIRE myhash [expr (1<<48) / 1000] FIELDS 1 f1}
+            assert_error {ERR invalid expire time in 'hexpireat' command} {r HEXPIREAT myhash [expr (1<<48) / 1000 + [clock seconds] + 100] FIELDS 1 f1}
+            assert_error {ERR invalid expire time in 'hpexpire' command} {r HPEXPIRE myhash [expr (1<<48)] FIELDS 1 f1}
+            assert_error {ERR invalid expire time in 'hpexpireat' command} {r HPEXPIREAT myhash [expr (1<<48) + [clock milliseconds] + 100] FIELDS 1 f1}
+        }
+
+        test "HPEXPIRE(AT) - Test 'NX' flag ($type)" {
+            r del myhash
+            r hset myhash field1 value1 field2 value2 field3 value3
+            assert_equal [r hpexpire myhash 1000 NX FIELDS 1 field1] [list  $E_OK]
+            assert_equal [r hpexpire myhash 1000 NX FIELDS 2 field1 field2] [list  $E_FAIL  $E_OK]
+
+            r del myhash
+            r hset myhash field1 value1 field2 value2 field3 value3
+            assert_equal [r hpexpireat myhash [expr {([clock seconds]+1000)*1000}] NX FIELDS 1 field1] [list  $E_OK]
+            assert_equal [r hpexpireat myhash [expr {([clock seconds]+1000)*1000}] NX FIELDS 2 field1 field2] [list  $E_FAIL  $E_OK]
+        }
+
+        test "HPEXPIRE(AT) - Test 'XX' flag ($type)" {
+            r del myhash
+            r hset myhash field1 value1 field2 value2 field3 value3
+            assert_equal [r hpexpire myhash 1000 NX FIELDS 2 field1 field2] [list  $E_OK  $E_OK]
+            assert_equal [r hpexpire myhash 1000 XX FIELDS 2 field1 field3] [list  $E_OK  $E_FAIL]
+
+            r del myhash
+            r hset myhash field1 value1 field2 value2 field3 value3
+            assert_equal [r hpexpireat myhash [expr {([clock seconds]+1000)*1000}] NX FIELDS 2 field1 field2] [list  $E_OK  $E_OK]
+            assert_equal [r hpexpireat myhash [expr {([clock seconds]+1000)*1000}] XX FIELDS 2 field1 field3] [list  $E_OK  $E_FAIL]
+        }
+
+        test "HPEXPIRE(AT) - Test 'GT' flag ($type)" {
+            r del myhash
+            r hset myhash field1 value1 field2 value2
+            assert_equal [r hpexpire myhash 1000 NX FIELDS 1 field1] [list  $E_OK]
+            assert_equal [r hpexpire myhash 2000 NX FIELDS 1 field2] [list  $E_OK]
+            assert_equal [r hpexpire myhash 1500 GT FIELDS 2 field1 field2] [list  $E_OK  $E_FAIL]
+
+            r del myhash
+            r hset myhash field1 value1 field2 value2
+            assert_equal [r hpexpireat myhash [expr {([clock seconds]+1000)*1000}] NX FIELDS 1 field1] [list  $E_OK]
+            assert_equal [r hpexpireat myhash [expr {([clock seconds]+2000)*1000}] NX FIELDS 1 field2] [list  $E_OK]
+            assert_equal [r hpexpireat myhash [expr {([clock seconds]+1500)*1000}] GT FIELDS 2 field1 field2] [list  $E_OK  $E_FAIL]
+        }
+
+        test "HPEXPIRE(AT) - Test 'LT' flag ($type)" {
+            r del myhash
+            r hset myhash field1 value1 field2 value2 field3 value3
+            assert_equal [r hpexpire myhash 1000 NX FIELDS 1 field1] [list  $E_OK]
+            assert_equal [r hpexpire myhash 2000 NX FIELDS 1 field2] [list  $E_OK]
+            assert_equal [r hpexpire myhash 1500 LT FIELDS 3 field1 field2 field3] [list  $E_FAIL $E_OK $E_OK]
+
+            r del myhash
+            r hset myhash field1 value1 field2 value2 field3 value3
+            assert_equal [r hpexpireat myhash [expr {([clock seconds]+1000)*1000}] NX FIELDS 1 field1] [list  $E_OK]
+            assert_equal [r hpexpireat myhash [expr {([clock seconds]+2000)*1000}] NX FIELDS 1 field2] [list  $E_OK]
+            assert_equal [r hpexpireat myhash [expr {([clock seconds]+1500)*1000}] LT FIELDS 3 field1 field2 field3] [list  $E_FAIL $E_OK $E_OK]
+        }
+
+        test "HPEXPIREAT - field not exists or TTL is in the past ($type)" {
+            r del myhash
+            r hset myhash f1 v1 f2 v2 f4 v4
+            r hexpire myhash 1000 NX FIELDS 1 f4
+            assert_equal [r hpexpireat myhash [expr {([clock seconds]-1)*1000}] NX FIELDS 4 f1 f2 f3 f4] "$E_DELETED $E_DELETED $E_NO_FIELD $E_FAIL"
+            assert_equal [r hexists myhash field1] 0
+        }
+
+        test "HPEXPIRE - wrong number of arguments ($type)" {
+            r del myhash
+            r hset myhash f1 v1
+            assert_error {*Parameter `numFields` should be greater than 0} {r hpexpire myhash 1000 NX FIELDS 0 f1 f2 f3}
+            # <count> not match with actual number of fields
+            assert_error {*parameter must match the number*} {r hpexpire myhash 1000 NX FIELDS 4 f1 f2 f3}
+            assert_error {*parameter must match the number*} {r hpexpire myhash 1000 NX FIELDS 2 f1 f2 f3}
+        }
+
+        test "HPEXPIRE - parameter expire-time near limit of  2^46 ($type)" {
+            r del myhash
+            r hset myhash f1 v1
+            # below & above
+            assert_equal [r hpexpire myhash [expr (1<<46) - [clock milliseconds] - 1000 ] FIELDS 1 f1] [list  $E_OK]
+            assert_error {*invalid expire time*} {r hpexpire myhash [expr (1<<46) - [clock milliseconds] + 100 ] FIELDS 1 f1}
+        }
+
+        test "Lazy Expire - fields are lazy deleted ($type)" {
+
+            # TODO remove the SELECT once dbid will be embedded inside dict/listpack
+            r select 0
+            r debug set-active-expire 0
+            r del myhash
+
+            r hset myhash f1 v1 f2 v2 f3 v3
+            r hpexpire myhash 1 NX FIELDS 3 f1 f2 f3
+            after 5
+
+            # Verify that still exists even if all fields are expired
+            assert_equal 1 [r EXISTS myhash]
+
+            # Verify that len counts also expired fields
+            assert_equal 3 [r HLEN myhash]
+
+            # Trying access to expired field should delete it. Len should be updated
+            assert_equal 0 [r hexists myhash f1]
+            assert_equal 2 [r HLEN myhash]
+
+            # Trying access another expired field should delete it. Len should be updated
+            assert_equal "" [r hget myhash f2]
+            assert_equal 1 [r HLEN myhash]
+
+            # Trying access last expired field should delete it. hash shouldn't exists afterward.
+            assert_equal 0 [r hstrlen myhash f3]
+            assert_equal 0 [r HLEN myhash]
+            assert_equal 0 [r EXISTS myhash]
+
+            # Restore default
+            r debug set-active-expire 1
+        }
+
+        test "Active Expire - deletes hash that all its fields got expired ($type)" {
+            r flushall
+
+            set hash_sizes {1 15 16 17 31 32 33 40}
+            foreach h $hash_sizes {
+                for {set i 1} {$i <= $h} {incr i} {
+                    # random expiration time
+                    r hset hrand$h f$i v$i
+                    r hpexpire hrand$h [expr {50 + int(rand() * 50)}] FIELDS 1 f$i
+                    assert_equal 1 [r HEXISTS hrand$h f$i]
+
+                    # same expiration time
+                    r hset same$h f$i v$i
+                    r hpexpire same$h 100 FIELDS 1 f$i
+                    assert_equal 1 [r HEXISTS same$h f$i]
+
+                    # same expiration time
+                    r hset mix$h f$i v$i fieldWithoutExpire$i v$i
+                    r hpexpire mix$h 100 FIELDS 1 f$i
+                    assert_equal 1 [r HEXISTS mix$h f$i]
+                }
+            }
+
+            # Wait for active expire
+            wait_for_condition 50 20 { [r EXISTS same40] == 0 } else { fail "hash `same40` should be expired" }
+
+            # Verify that all fields got expired and keys got deleted
+            foreach h $hash_sizes {
+                wait_for_condition 50 20 {
+                    [r HLEN mix$h] == $h
+                } else {
+                    fail "volatile fields of hash `mix$h` should be expired"
+                }
+
+                for {set i 1} {$i <= $h} {incr i} {
+                    assert_equal 0 [r HEXISTS mix$h f$i]
+                }
+                assert_equal 0 [r EXISTS hrand$h]
+                assert_equal 0 [r EXISTS same$h]
+            }
+        }
+
+        test "HPEXPIRE - Flushall deletes all pending expired fields ($type)" {
+            r del myhash
+            r hset myhash field1 value1 field2 value2
+            r hpexpire myhash 10000 NX FIELDS 1 field1
+            r hpexpire myhash 10000 NX FIELDS 1 field2
+            r flushall
+            r del myhash
+            r hset myhash field1 value1 field2 value2
+            r hpexpire myhash 10000 NX FIELDS 1 field1
+            r hpexpire myhash 10000 NX FIELDS 1 field2
+            r flushall async
+        }
+
+        test "HTTL/HPTTL - Returns array if the key does not exist" {
+            r del myhash
+            assert_equal [r HTTL myhash FIELDS 1 a] [list $T_NO_FIELD]
+            assert_equal [r HPTTL myhash FIELDS 2 a b] [list $T_NO_FIELD $T_NO_FIELD]
+        }
+
+        test "HTTL/HPTTL - Input validation gets failed on nonexists field or field without expire ($type)" {
+            r del myhash
+            r HSET myhash field1 value1 field2 value2
+            r HPEXPIRE myhash 1000 NX FIELDS 1 field1
+
+            foreach cmd {HTTL HPTTL} {
+                assert_equal [r $cmd myhash FIELDS 2 field2 non_exists_field] "$T_NO_EXPIRY $T_NO_FIELD"
+                # <count> not match with actual number of fields
+                assert_error {*parameter must match the number*} {r $cmd myhash FIELDS 1 non_exists_field1 non_exists_field2}
+                assert_error {*parameter must match the number*} {r $cmd myhash FIELDS 3 non_exists_field1 non_exists_field2}
+            }
+        }
+
+        test "HTTL/HPTTL - returns time to live in seconds/msillisec ($type)" {
+            r del myhash
+            r HSET myhash field1 value1 field2 value2
+            r HPEXPIRE myhash 2000 NX FIELDS 2 field1 field2
+            set ttlArray [r HTTL myhash FIELDS 2 field1 field2]
+            assert_range [lindex $ttlArray 0] 1 2
+            set ttl [r HPTTL myhash FIELDS 1 field1]
+            assert_range $ttl 1000 2000
+        }
+
+        test "HEXPIRETIME/HPEXPIRETIME - Returns array if the key does not exist" {
+            r del myhash
+            assert_equal [r HEXPIRETIME myhash FIELDS 1 a] [list $T_NO_FIELD]
+            assert_equal [r HPEXPIRETIME myhash FIELDS 2 a b] [list $T_NO_FIELD $T_NO_FIELD]
+        }
+
+        test "HEXPIRETIME - returns TTL in Unix timestamp ($type)" {
+            r del myhash
+            r HSET myhash field1 value1
+            r HPEXPIRE myhash 1000 NX FIELDS 1 field1
+
+            set lo [expr {[clock seconds] + 1}]
+            set hi [expr {[clock seconds] + 2}]
+            assert_range [r HEXPIRETIME myhash FIELDS 1 field1] $lo $hi
+            assert_range [r HPEXPIRETIME myhash FIELDS 1 field1] [expr $lo*1000] [expr $hi*1000]
+        }
+
+        test "HTTL/HPTTL - Verify TTL progress until expiration ($type)" {
+            r del myhash
+            r hset myhash field1 value1 field2 value2
+            r hpexpire myhash 1000 NX FIELDS 1 field1
+            assert_range [r HPTTL myhash FIELDS 1 field1] 100 1000
+            assert_range [r HTTL myhash FIELDS 1 field1] 0 1
+            after 100
+            assert_range [r HPTTL myhash FIELDS 1 field1] 1 901
+            after 910
+            assert_equal [r HPTTL myhash FIELDS 1 field1] $T_NO_FIELD
+            assert_equal [r HTTL myhash FIELDS 1 field1] $T_NO_FIELD
+        }
+
+        test "HPEXPIRE - DEL hash with non expired fields (valgrind test) ($type)" {
+            r del myhash
+            r hset myhash field1 value1 field2 value2
+            r hpexpire myhash 10000 NX FIELDS 1 field1
+            r del myhash
+        }
+
+        test "HEXPIREAT - Set time in the past ($type)" {
+            r del myhash
+            r hset myhash field1 value1
+            assert_equal [r hexpireat myhash [expr {[clock seconds] - 1}] NX FIELDS 1 field1] $E_DELETED
+            assert_equal [r hexists myhash field1] 0
+        }
+
+        test "HEXPIREAT - Set time and then get TTL ($type)" {
+            r del myhash
+            r hset myhash field1 value1
+
+            r hexpireat myhash [expr {[clock seconds] + 2}] NX FIELDS 1 field1
+            assert_range [r hpttl myhash FIELDS 1 field1] 1000 2000
+            assert_range [r httl myhash FIELDS 1 field1] 1 2
+
+            r hexpireat myhash [expr {[clock seconds] + 5}] XX FIELDS 1 field1
+            assert_range [r httl myhash FIELDS 1 field1] 4 5
+        }
+
+        test "Lazy Expire - delete hash with expired fields ($type)" {
+            r del myhash
+            r debug set-active-expire 0
+            r hset myhash k v
+            r hpexpire myhash 1 NX FIELDS 1 k
+            after 5
+            r del myhash
+            r debug set-active-expire 1
+        }
+
+        test "Test HRANDFIELD deletes all expired fields ($type)" {
+            r debug set-active-expire 0
+            r flushall
+            r hset myhash f1 v1 f2 v2 f3 v3 f4 v4 f5 v5
+            r hpexpire myhash 1 FIELDS 2 f1 f2
+            after 5
+            assert_equal [lsort [r hrandfield myhash 5]] "f3 f4 f5"
+            r hpexpire myhash 1 FIELDS 3 f3 f4 f5
+            after 5
+            assert_equal [lsort [r hrandfield myhash 5]] ""
+            assert_equal [r keys *] ""
+
+            r del myhash
+            r hset myhash f1 v1 f2 v2 f3 v3
+            r hpexpire myhash 1 FIELDS 1 f1
+            after 5
+            set res [r hrandfield myhash]
+            assert {$res == "f2" || $res == "f3"}
+            r hpexpire myhash 1 FIELDS 1 f2
+            after 5
+            assert_equal [lsort [r hrandfield myhash 5]] "f3"
+            r hpexpire myhash 1 FIELDS 1 f3
+            after 5
+            assert_equal [r hrandfield myhash] ""
+            assert_equal [r keys *] ""
+
+            r debug set-active-expire 1
+        }
+
+        test "Lazy Expire - HLEN does count expired fields ($type)" {
+            # Enforce only lazy expire
+            r debug set-active-expire 0
+
+            r del h1 h4 h18 h20
+            r hset h1 k1 v1
+            r hpexpire h1 1 NX FIELDS 1 k1
+
+            r hset h4 k1 v1 k2 v2 k3 v3 k4 v4
+            r hpexpire h4 1 NX FIELDS 3 k1 k3 k4
+
+            # beyond 16 fields: HFE DS (ebuckets) converts from list to rax
+
+            r hset h18 k1 v1 k2 v2 k3 v3 k4 v4 k5 v5 k6 v6 k7 v7 k8 v8 k9 v9 k10 v10 k11 v11 k12 v12 k13 v13 k14 v14 k15 v15 k16 v16 k17 v17 k18 v18
+            r hpexpire h18 1 NX FIELDS 18 k1 k2 k3 k4 k5 k6 k7 k8 k9 k10 k11 k12 k13 k14 k15 k16 k17 k18
+
+            r hset h20 k1 v1 k2 v2 k3 v3 k4 v4 k5 v5 k6 v6 k7 v7 k8 v8 k9 v9 k10 v10 k11 v11 k12 v12 k13 v13 k14 v14 k15 v15 k16 v16 k17 v17 k18 v18 k19 v19 k20 v20
+            r hpexpire h20 1 NX FIELDS 2 k1 k2
+
+            after 10
+
+            assert_equal [r hlen h1] 1
+            assert_equal [r hlen h4] 4
+            assert_equal [r hlen h18] 18
+            assert_equal [r hlen h20] 20
+            # Restore to support active expire
+            r debug set-active-expire 1
+        }
+
+        test "Lazy Expire - HSCAN does not report expired fields ($type)" {
+            # Enforce only lazy expire
+            r debug set-active-expire 0
+
+            r del h1 h20 h4 h18 h20
+            r hset h1 01 01
+            r hpexpire h1 1 NX FIELDS 1 01
+
+            r hset h4 01 01 02 02 03 03 04 04
+            r hpexpire h4 1 NX FIELDS 3 01 03 04
+
+            # beyond 16 fields hash-field expiration DS (ebuckets) converts from list to rax
+
+            r hset h18 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08 08 09 09 10 10 11 11 12 12 13 13 14 14 15 15 16 16 17 17 18 18
+            r hpexpire h18 1 NX FIELDS 18 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18
+
+            r hset h20 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08 08 09 09 10 10 11 11 12 12 13 13 14 14 15 15 16 16 17 17 18 18 19 19 20 20
+            r hpexpire h20 1 NX FIELDS 2 01 02
+
+            after 10
+
+            # Verify SCAN does not report expired fields
+            assert_equal [lsort -unique [lindex [r hscan h1 0 COUNT 10] 1]] ""
+            assert_equal [lsort -unique [lindex [r hscan h4 0 COUNT 10] 1]] "02"
+            assert_equal [lsort -unique [lindex [r hscan h18 0 COUNT 10] 1]] ""
+            assert_equal [lsort -unique [lindex [r hscan h20 0 COUNT 100] 1]] "03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20"
+            # Restore to support active expire
+            r debug set-active-expire 1
+        }
+
+        test "Test HSCAN with mostly expired fields return empty result ($type)" {
+            r debug set-active-expire 0
+
+            # Create hash with 1000 fields and 999 of them will be expired
+            r del myhash
+            for {set i 1} {$i <= 1000} {incr i} {
+                r hset myhash field$i value$i
+                if {$i > 1} {
+                    r hpexpire myhash 1 NX FIELDS 1 field$i
+                }
+            }
+            after 3
+
+            # Verify iterative HSCAN returns either empty result or only the first field
+            set countEmptyResult 0
+            set cur 0
+            while 1 {
+                set res [r hscan myhash $cur]
+                set cur [lindex $res 0]
+                # if the result is not empty, it should contain only the first field
+                if {[llength [lindex $res 1]] > 0} {
+                    assert_equal [lindex $res 1] "field1 value1"
+                } else {
+                    incr countEmptyResult
+                }
+                if {$cur == 0} break
+            }
+            assert {$countEmptyResult > 0}
+            r debug set-active-expire 1
+        }
+
+        test "Lazy Expire - verify various HASH commands handling expired fields ($type)" {
+            # Enforce only lazy expire
+            r debug set-active-expire 0
+            r del h1 h2 h3 h4 h5 h18
+            r hset h1 01 01
+            r hset h2 01 01 02 02
+            r hset h3 01 01 02 02 03 03
+            r hset h4 1 99 2 99 3 99 4 99
+            r hset h5 1 1 2 22 3 333 4 4444 5 55555
+            r hset h6 01 01 02 02 03 03 04 04 05 05 06 06
+            r hset h18 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08 08 09 09 10 10 11 11 12 12 13 13 14 14 15 15 16 16 17 17 18 18
+            r hpexpire h1 100 NX FIELDS 1 01
+            r hpexpire h2 100 NX FIELDS 1 01
+            r hpexpire h2 100 NX FIELDS 1 02
+            r hpexpire h3 100 NX FIELDS 1 01
+            r hpexpire h4 100 NX FIELDS 1 2
+            r hpexpire h5 100 NX FIELDS 1 3
+            r hpexpire h6 100 NX FIELDS 1 05
+            r hpexpire h18 100 NX FIELDS 17 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17
+
+            after 150
+
+            # Verify HDEL not ignore expired field. It is too much overhead to check
+            # if the field is expired before deletion.
+            assert_equal [r HDEL h1 01] "1"
+
+            # Verify HGET ignore expired field
+            assert_equal [r HGET h2 01] ""
+            assert_equal [r HGET h2 02] ""
+            assert_equal [r HGET h3 01] ""
+            assert_equal [r HGET h3 02] "02"
+            assert_equal [r HGET h3 03] "03"
+            # Verify HINCRBY ignore expired field
+            assert_equal [r HINCRBY h4 2 1] "1"
+            assert_equal [r HINCRBY h4 3 1] "100"
+            # Verify HSTRLEN ignore expired field
+            assert_equal [r HSTRLEN h5 3] "0"
+            assert_equal [r HSTRLEN h5 4] "4"
+            assert_equal [lsort [r HKEYS h6]] "01 02 03 04 06"
+            # Verify HEXISTS ignore expired field
+            assert_equal [r HEXISTS h18 07] "0"
+            assert_equal [r HEXISTS h18 18] "1"
+            # Verify HVALS ignore expired field
+            assert_equal [lsort [r HVALS h18]] "18"
+            # Restore to support active expire
+            r debug set-active-expire 1
+        }
+
+        test "A field with TTL overridden with another value (TTL discarded) ($type)" {
+            r del myhash
+            r hset myhash field1 value1 field2 value2 field3 value3
+            r hpexpire myhash 10000 NX FIELDS 1 field1
+            r hpexpire myhash 1 NX FIELDS 1 field2
+
+            # field2 TTL will be discarded
+            r hset myhash field2 value4
+            after 5
+            # Expected TTL will be discarded
+            assert_equal [r hget myhash field2] "value4"
+            assert_equal [r httl myhash FIELDS 2 field2 field3] "$T_NO_EXPIRY $T_NO_EXPIRY"
+            assert_not_equal [r httl myhash FIELDS 1 field1] "$T_NO_EXPIRY"
+        }
+
+        test "Modify TTL of a field ($type)" {
+            r del myhash
+            r hset myhash field1 value1
+            r hpexpire myhash 200000 NX FIELDS 1 field1
+            r hpexpire myhash 1000000 XX FIELDS 1 field1
+            after 15
+            assert_equal [r hget myhash field1] "value1"
+            assert_range [r hpttl myhash FIELDS 1 field1] 900000 1000000
+        }
+
+        test "Test return value of set operation ($type)" {
+             r del myhash
+             r hset myhash f1 v1 f2 v2
+             r hexpire myhash 100000 FIELDS 1 f1
+             assert_equal [r hset myhash f2 v2] 0
+             assert_equal [r hset myhash f3 v3] 1
+             assert_equal [r hset myhash f3 v3 f4 v4] 1
+             assert_equal [r hset myhash f3 v3 f5 v5 f6 v6] 2
+        }
+
+        test "Test HGETALL not return expired fields ($type)" {
+            # Test with small hash
+            r debug set-active-expire 0
+            r del myhash
+            r hset myhash1 f1 v1 f2 v2 f3 v3 f4 v4 f5 v5 f6 v6
+            r hpexpire myhash1 1 NX FIELDS 3 f2 f4 f6
+            after 10
+            assert_equal [lsort [r hgetall myhash1]] "f1 f3 f5 v1 v3 v5"
+
+            # Test with large hash
+            r del myhash
+            for {set i 1} {$i <= 600} {incr i} {
+                r hset myhash f$i v$i
+                if {$i > 3} { r hpexpire myhash 1 NX FIELDS 1 f$i }
+            }
+            after 10
+            assert_equal [lsort [r hgetall myhash]] [lsort "f1 f2 f3 v1 v2 v3"]
+            r debug set-active-expire 1
+        }
+
+        test "Test RENAME hash with fields to be expired ($type)" {
+            r debug set-active-expire 0
+            r del myhash
+            r hset myhash field1 value1
+            r hpexpire myhash 20 NX FIELDS 1 field1
+            r rename myhash myhash2
+            assert_equal [r exists myhash] 0
+            assert_range [r hpttl myhash2 FIELDS 1 field1] 1 20
+            after 25
+            # Verify the renamed key exists
+            assert_equal [r exists myhash2] 1
+            r debug set-active-expire 1
+            # Only active expire will delete the key
+            wait_for_condition 30 10 { [r exists myhash2] == 0 } else { fail "`myhash2` should be expired" }
+        }
+
+        test "MOVE to another DB hash with fields to be expired ($type)" {
+            r select 9
+            r flushall
+            r hset myhash field1 value1
+            r hpexpire myhash 100 NX FIELDS 1 field1
+            r move myhash 10
+            assert_equal [r exists myhash] 0
+            assert_equal [r dbsize] 0
+
+            # Verify the key and its field exists in the target DB
+            r select 10
+            assert_equal [r hget myhash field1] "value1"
+            assert_equal [r exists myhash] 1
+
+            # Eventually the field will be expired and the key will be deleted
+            wait_for_condition 40 10 { [r hget myhash field1] == "" } else { fail "`field1` should be expired" }
+            wait_for_condition 40 10 { [r exists myhash] == 0 } else { fail "db should be empty" }
+        } {} {singledb:skip}
+
+        test "Test COPY hash with fields to be expired ($type)" {
+            r flushall
+            r hset h1 f1 v1 f2 v2
+            r hset h2 f1 v1 f2 v2 f3 v3 f4 v4 f5 v5 f6 v6 f7 v7 f8 v8 f9 v9 f10 v10 f11 v11 f12 v12 f13 v13 f14 v14 f15 v15 f16 v16 f17 v17 f18 v18
+            r hpexpire h1 100 NX FIELDS 1 f1
+            r hpexpire h2 100 NX FIELDS 18 f1 f2 f3 f4 f5 f6 f7 f8 f9 f10 f11 f12 f13 f14 f15 f16 f17 f18
+            r COPY h1 h1copy
+            r COPY h2 h2copy
+            assert_equal [r hget h1 f1] "v1"
+            assert_equal [r hget h1copy f1] "v1"
+            assert_equal [r exists h2] 1
+            assert_equal [r exists h2copy] 1
+            after 105
+
+            # Verify lazy expire of field in h1 and its copy
+            assert_equal [r hget h1 f1] ""
+            assert_equal [r hget h1copy f1] ""
+
+            # Verify lazy expire of field in h2 and its copy. Verify the key deleted as well.
+            wait_for_condition 40 10 { [r exists h2] == 0 } else { fail "`h2` should be expired" }
+            wait_for_condition 40 10 { [r exists h2copy] == 0 } else { fail "`h2copy` should be expired" }
+
+        } {} {singledb:skip}
+
+        test "Test SWAPDB hash-fields to be expired ($type)" {
+            r select 9
+            r flushall
+            r hset myhash field1 value1
+            r hpexpire myhash 50 NX FIELDS 1 field1
+
+            r swapdb 9 10
+
+            # Verify the key and its field doesn't exist in the source DB
+            assert_equal [r exists myhash] 0
+            assert_equal [r dbsize] 0
+
+            # Verify the key and its field exists in the target DB
+            r select 10
+            assert_equal [r hget myhash field1] "value1"
+            assert_equal [r dbsize] 1
+
+            # Eventually the field will be expired and the key will be deleted
+            wait_for_condition 20 10 { [r exists myhash] == 0 } else { fail "'myhash' should be expired" }
+        } {} {singledb:skip}
+
+        test "HMGET - returns empty entries if fields or hash expired ($type)" {
+            r debug set-active-expire 0
+            r del h1 h2
+            r hset h1 f1 v1 f2 v2 f3 v3
+            r hset h2 f1 v1 f2 v2 f3 v3
+            r hpexpire h1 10000000 NX FIELDS 1 f1
+            r hpexpire h1 1 NX FIELDS 2 f2 f3
+            r hpexpire h2 1 NX FIELDS 3 f1 f2 f3
+            after 5
+            assert_equal [r hmget h1 f1 f2 f3] {v1 {} {}}
+            assert_equal [r hmget h2 f1 f2 f3] {{} {} {}}
+            r debug set-active-expire 1
+        }
+
+        test "HPERSIST - Returns array if the key does not exist ($type)" {
+            r del myhash
+            assert_equal [r HPERSIST myhash FIELDS 1 a] [list $P_NO_FIELD]
+            assert_equal [r HPERSIST myhash FIELDS 2 a b] [list $P_NO_FIELD $P_NO_FIELD]
+        }
+
+        test "HPERSIST - input validation ($type)" {
+            # HPERSIST key <num-fields> <field [field ...]>
+            r del myhash
+            r hset myhash f1 v1 f2 v2
+            r hexpire myhash 1000 NX FIELDS 1 f1
+            assert_error {*wrong number of arguments*} {r hpersist myhash}
+            assert_error {*wrong number of arguments*} {r hpersist myhash FIELDS 1}
+            assert_equal [r hpersist myhash FIELDS 2 f1 not-exists-field] "$P_OK $P_NO_FIELD"
+            assert_equal [r hpersist myhash FIELDS 1 f2] "$P_NO_EXPIRY"
+            # <count> not match with actual number of fields
+            assert_error {*parameter must match the number*} {r hpersist myhash FIELDS 2 f1 f2 f3}
+            assert_error {*parameter must match the number*} {r hpersist myhash FIELDS 4 f1 f2 f3}
+        }
+
+        test "HPERSIST - verify fields with TTL are persisted ($type)" {
+            r del myhash
+            r hset myhash f1 v1 f2 v2
+            r hexpire myhash 20 NX FIELDS 2 f1 f2
+            r hpersist myhash FIELDS 2 f1 f2
+            after 25
+            assert_equal [r hget myhash f1] "v1"
+            assert_equal [r hget myhash f2] "v2"
+            assert_equal [r HTTL myhash FIELDS 2 f1 f2] "$T_NO_EXPIRY $T_NO_EXPIRY"
+        }
+
+        test "HTTL/HPERSIST - Test expiry commands with non-volatile hash ($type)" {
+            r del myhash
+            r hset myhash field1 value1 field2 value2 field3 value3
+            assert_equal [r httl myhash FIELDS 1 field1] $T_NO_EXPIRY
+            assert_equal [r httl myhash FIELDS 1 fieldnonexist] $E_NO_FIELD
+
+            assert_equal [r hpersist myhash FIELDS 1 field1] $P_NO_EXPIRY
+            assert_equal [r hpersist myhash FIELDS 1 fieldnonexist] $P_NO_FIELD
+        }
+
+        test {DUMP / RESTORE are able to serialize / unserialize a hash} {
+            r config set sanitize-dump-payload yes
+            r del myhash
+            r hmset myhash a 1 b 2 c 3
+            r hexpireat myhash 2524600800 fields 1 a
+            r hexpireat myhash 2524600801 fields 1 b
+            set encoded [r dump myhash]
+            r del myhash
+            r restore myhash 0 $encoded
+            assert_equal [lsort [r hgetall myhash]] "1 2 3 a b c"
+            assert_equal [r hexpiretime myhash FIELDS 3 a b c] {2524600800 2524600801 -1}
+        }
+
+        test {DUMP / RESTORE are able to serialize / unserialize a hash with TTL 0 for all fields} {
+            r config set sanitize-dump-payload yes
+            r del myhash
+            r hmset myhash a 1 b 2 c 3
+            r hexpire myhash 9999999 fields 1 a ;# make all TTLs of fields to 0
+            r hpersist myhash fields 1 a
+            assert_encoding $type myhash
+            set encoded [r dump myhash]
+            r del myhash
+            r restore myhash 0 $encoded
+            assert_equal [lsort [r hgetall myhash]] "1 2 3 a b c"
+            assert_equal [r hexpiretime myhash FIELDS 3 a b c] {-1 -1 -1}
+        }
+
+        test {HINCRBY - discards pending expired field and reset its value} {
+            r debug set-active-expire 0
+            r del h1 h2
+            r hset h1 f1 10 f2 2
+            r hset h2 f1 10
+            assert_equal [r HINCRBY h1 f1 2] 12
+            assert_equal [r HINCRBY h2 f1 2] 12
+            r HPEXPIRE h1 10 FIELDS 1 f1
+            r HPEXPIRE h2 10 FIELDS 1 f1
+            after 15
+            assert_equal [r HINCRBY h1 f1 1] 1
+            assert_equal [r HINCRBY h2 f1 1] 1
+            r debug set-active-expire 1
+        }
+
+        test {HINCRBY - preserve expiration time of the field} {
+            r del h1
+            r hset h1 f1 10
+            r hpexpire h1 20 FIELDS 1 f1
+            assert_equal [r HINCRBY h1 f1 2] 12
+            assert_range [r HPTTL h1 FIELDS 1 f1] 1 20
+        }
+
+
+        test {HINCRBYFLOAT - discards pending expired field and reset its value} {
+            r debug set-active-expire 0
+            r del h1 h2
+            r hset h1 f1 10 f2 2
+            r hset h2 f1 10
+            assert_equal [r HINCRBYFLOAT h1 f1 2] 12
+            assert_equal [r HINCRBYFLOAT h2 f1 2] 12
+            r HPEXPIRE h1 10 FIELDS 1 f1
+            r HPEXPIRE h2 10 FIELDS 1 f1
+            after 15
+            assert_equal [r HINCRBYFLOAT h1 f1 1] 1
+            assert_equal [r HINCRBYFLOAT h2 f1 1] 1
+            r debug set-active-expire 1
+        }
+
+        test {HINCRBYFLOAT - preserve expiration time of the field} {
+            r del h1
+            r hset h1 f1 10
+            r hpexpire h1 20 FIELDS 1 f1
+            assert_equal [r HINCRBYFLOAT h1 f1 2.5] 12.5
+            assert_range [r HPTTL h1 FIELDS 1 f1] 1 20
+        }
+    }
+
+    r config set hash-max-listpack-entries 512
+}
+
+start_server {tags {"external:skip needs:debug"}} {
+
+    # Tests that only applies to listpack
+
+    test "Test listpack memory usage" {
+        r hset myhash f1 v1 f2 v2 f3 v3 f4 v4 f5 v5
+        r hpexpire myhash 5 FIELDS 2 f2 f4
+
+        # Just to have code coverage for the new listpack encoding
+        r memory usage myhash
+    }
+
+    test "Test listpack object encoding" {
+        r hset myhash f1 v1 f2 v2 f3 v3 f4 v4 f5 v5
+        r hpexpire myhash 5 FIELDS 2 f2 f4
+
+        # Just to have code coverage for the listpackex encoding
+        assert_equal [r object encoding myhash] "listpackex"
+    }
+
+    test "Test listpack debug listpack" {
+        r hset myhash f1 v1 f2 v2 f3 v3 f4 v4 f5 v5
+
+        # Just to have code coverage for the listpackex encoding
+        r debug listpack myhash
+    }
+
+    test "Test listpack converts to ht and passive expiry works" {
+        set prev [lindex [r config get hash-max-listpack-entries] 1]
+        r config set hash-max-listpack-entries 10
+        r debug set-active-expire 0
+
+        r hset myhash f1 v1 f2 v2 f3 v3 f4 v4 f5 v5
+        r hpexpire myhash 5 FIELDS 2 f2 f4
+
+        for {set i 6} {$i < 11} {incr i} {
+            r hset myhash f$i v$i
+        }
+        after 50
+        assert_equal [lsort [r hgetall myhash]] [lsort "f1 f3 f5 f6 f7 f8 f9 f10 v1 v3 v5 v6 v7 v8 v9 v10"]
+        r config set hash-max-listpack-entries $prev
+        r debug set-active-expire 1
+    }
+
+    test "Test listpack converts to ht and active expiry works" {
+        r del myhash
+        r debug set-active-expire 0
+
+        r hset myhash f1 v1 f2 v2 f3 v3 f4 v4 f5 v5
+        r hpexpire myhash 10 FIELDS 1 f1
+
+        for {set i 0} {$i < 2048} {incr i} {
+            r hset myhash f$i v$i
+        }
+
+        for {set i 0} {$i < 2048} {incr i} {
+            r hpexpire myhash 10 FIELDS 1 f$i
+        }
+
+        r debug set-active-expire 1
+        wait_for_condition 50 20 { [r EXISTS myhash] == 0 } else { fail "'myhash' should be expired" }
+    }
+
+    test "Test listpack converts to ht and active expiry works" {
+        r del myhash
+        r debug set-active-expire 0
+
+        # Check expiry works after listpack converts to ht
+        for {set i 0} {$i < 1024} {incr i} {
+            r hset myhash f1_$i v1_$i f2_$i v2_$i f3_$i v3_$i f4_$i v4_$i
+            r hpexpire myhash 10 FIELDS 4 f1_$i f2_$i f3_$i f4_$i
+        }
+
+        assert_encoding hashtable myhash
+        assert_equal [r hlen myhash] 4096
+
+        r debug set-active-expire 1
+        wait_for_condition 50 20 { [r EXISTS myhash] == 0 } else { fail "'myhash' should be expired" }
+    }
+
+    test "HPERSIST/HEXPIRE - Test listpack with large values" {
+        r del myhash
+
+        # Test with larger values to verify we successfully move fields in
+        # listpack when we are ordering according to TTL. This config change
+        # will make code to use temporary heap allocation when moving fields.
+        # See listpackExUpdateExpiry() for details.
+        r config set hash-max-listpack-value 2048
+
+        set payload1 [string repeat v3 1024]
+        set payload2 [string repeat v1 1024]
+
+        # Test with single item list
+        r hset myhash f1 $payload1
+        r hexpire myhash 2000 FIELDS 1 f1
+        assert_equal [r hget myhash f1] $payload1
+        r del myhash
+
+        # Test with multiple items
+        r hset myhash f1 $payload2 f2 v2 f3 $payload1 f4 v4
+        r hexpire myhash 100000 FIELDS 1 f3
+        r hpersist myhash FIELDS 1 f3
+        assert_equal [r hpersist myhash FIELDS 1 f3] $P_NO_EXPIRY
+
+        r hpexpire myhash 10 FIELDS 1 f1
+        after 20
+        assert_equal [lsort [r hgetall myhash]] [lsort "f2 f3 f4 v2 $payload1 v4"]
+
+        r config set hash-max-listpack-value 64
+    }
+
+    test "Statistics - Hashes with HFEs" {
+        r config resetstat
+        r del myhash
+        r hset myhash f1 v1 f2 v2 f3 v3 f4 v4 f5 v5
+        r hpexpire myhash 100 FIELDS 3 f1 f2 f3
+
+        assert_match  [get_hashes_with_expiry_fields r] 1
+        r hset myhash2 f1 v1 f2 v2 f3 v3 f4 v4 f5 v5
+        assert_match  [get_hashes_with_expiry_fields r] 1
+        r hpexpire myhash2 100 FIELDS 3 f1 f2 f3
+        assert_match  [get_hashes_with_expiry_fields r] 2
+
+        wait_for_condition 50 50 {
+                [get_hashes_with_expiry_fields r] == 0
+        } else {
+                fail "Hash field expiry statistics failed"
+        }
+    }
+}
+
+start_server {tags {"external:skip needs:debug"}} {
+    foreach type {listpack ht} {
+        if {$type eq "ht"} {
+            r config set hash-max-listpack-entries 0
+        } else {
+            r config set hash-max-listpack-entries 512
+        }
+
+        test "Test Command propagated to replica as expected ($type)" {
+            start_server {overrides {appendonly {yes} appendfsync always} tags {external:skip}} {
+
+                set aof [get_last_incr_aof_path r]
+
+                # Time is in the past so it should propagate HDELs to replica
+                # and delete the fields
+                r hset h0 x1 y1 x2 y2
+                r hexpireat h0 1 fields 3 x1 x2 non_exists_field
+
+                r hset h1 f1 v1 f2 v2
+
+                # Next command won't be propagated to replica
+                # because XX condition not met or field not exists
+                r hexpire h1 10 XX FIELDS 3 f1 f2 non_exists_field
+
+                r hpexpire h1 20 FIELDS 1 f1
+
+                # Next command will be propagate with only field 'f2'
+                # because NX condition not met for field 'f1'
+                r hpexpire h1 30 NX FIELDS 2 f1 f2
+
+                # Non exists field should be ignored
+                r hpexpire h1 30 FIELDS 1 non_exists_field
+                r hset h2 f1 v1 f2 v2 f3 v3 f4 v4
+                r hpexpire h2 40 FIELDS 2 f1 non_exists_field
+                r hpexpire h2 50 FIELDS 1 f2
+                r hpexpireat h2 [expr [clock seconds]*1000+100000] LT FIELDS 1 f3
+                r hexpireat h2 [expr [clock seconds]+10] NX FIELDS 1 f4
+
+                wait_for_condition 50 100 {
+                    [r hlen h2] eq 2
+                } else {
+                    fail "Field f2 of hash h2 wasn't deleted"
+                }
+
+                # Assert that each TTL-related command are persisted with absolute timestamps in AOF
+                assert_aof_content $aof {
+                    {select *}
+                    {hset h0 x1 y1 x2 y2}
+                    {multi}
+                        {hdel h0 x1}
+                        {hdel h0 x2}
+                    {exec}
+                    {hset h1 f1 v1 f2 v2}
+                    {hpexpireat h1 * FIELDS 1 f1}
+                    {hpexpireat h1 * FIELDS 1 f2}
+                    {hset h2 f1 v1 f2 v2 f3 v3 f4 v4}
+                    {hpexpireat h2 * FIELDS 1 f1}
+                    {hpexpireat h2 * FIELDS 1 f2}
+                    {hpexpireat h2 * FIELDS 1 f3}
+                    {hpexpireat h2 * FIELDS 1 f4}
+                    {hdel h1 f1}
+                    {hdel h1 f2}
+                    {hdel h2 f1}
+                    {hdel h2 f2}
+                }
+            }
+        }
+
+        test "Lazy Expire - fields are lazy deleted and propagated to replicas ($type)" {
+            start_server {overrides {appendonly {yes} appendfsync always} tags {external:skip}} {
+                r debug set-active-expire 0
+                set aof [get_last_incr_aof_path r]
+
+                r del myhash
+
+                r hset myhash f1 v1 f2 v2 f3 v3
+                r hpexpire myhash 1 NX FIELDS 3 f1 f2 f3
+                after 5
+
+                # Verify that still exists even if all fields are expired
+                assert_equal 1 [r EXISTS myhash]
+
+                # Verify that len counts also expired fields
+                assert_equal 3 [r HLEN myhash]
+
+                # Trying access to expired field should delete it. Len should be updated
+                assert_equal 0 [r hexists myhash f1]
+                assert_equal 2 [r HLEN myhash]
+
+                # Trying access another expired field should delete it. Len should be updated
+                assert_equal "" [r hget myhash f2]
+                assert_equal 1 [r HLEN myhash]
+
+                # Trying access last expired field should delete it. hash shouldn't exists afterward.
+                assert_equal 0 [r hstrlen myhash f3]
+                assert_equal 0 [r HLEN myhash]
+                assert_equal 0 [r EXISTS myhash]
+
+                wait_for_condition 50 100 { [r exists h1] == 0 } else { fail "hash h1 wasn't deleted" }
+
+                # HDEL are propagated as expected
+                assert_aof_content $aof {
+                    {select *}
+                    {hset myhash f1 v1 f2 v2 f3 v3}
+                    {hpexpireat myhash * NX FIELDS 3 f1 f2 f3}
+                    {hdel myhash f1}
+                    {hdel myhash f2}
+                    {hdel myhash f3}
+                }
+                r debug set-active-expire 1
+            }
+        }
+
+        # Start a new server with empty data and AOF file.
+        start_server {overrides {appendonly {yes} appendfsync always} tags {external:skip}} {
+
+            # Based on test at expire.tcl: " All time-to-live(TTL) in commands are propagated as absolute ..."
+            test {All TTLs in commands are propagated as absolute timestamp in milliseconds in AOF} {
+
+                set aof [get_last_incr_aof_path r]
+
+                r hset h1 f1 v1 f2 v2 f3 v3 f4 v4 f5 v5 f6 v6
+                r hexpireat h1 [expr [clock seconds]+100] NX FIELDS 1 f1
+                r hpexpireat h1 [expr [clock seconds]*1000+100000] NX FIELDS 1 f2
+                r hpexpire h1 100000 NX FIELDS 3 f3 f4 f5
+                r hexpire h1 100000 FIELDS 1 f6
+
+                # Verify HRANDFIELD deletes expired fields and propagates it
+                r hset h2 f1 v1 f2 v2
+                r hpexpire h2 1 FIELDS 2 f1 f2
+                after 5
+                assert_equal [r hrandfield h4 2] ""
+                after 200
+
+                assert_aof_content $aof {
+                    {select *}
+                    {hset h1 f1 v1 f2 v2 f3 v3 f4 v4 f5 v5 f6 v6}
+                    {hpexpireat h1 * FIELDS 1 f1}
+                    {hpexpireat h1 * FIELDS 1 f2}
+                    {hpexpireat h1 * NX FIELDS 3 f3 f4 f5}
+                    {hpexpireat h1 * FIELDS 1 f6}
+                    {hset h2 f1 v1 f2 v2}
+                    {hpexpireat h2 * FIELDS 2 f1 f2}
+                    {hdel h2 *}
+                    {hdel h2 *}
+                }
+
+                array set keyAndFields1 [dumpAllHashes r]
+                # Let some time pass and reload data from AOF
+                after 2000
+                r debug loadaof
+                array set keyAndFields2 [dumpAllHashes r]
+
+                # Assert that absolute TTLs are the same
+                assert_equal [array get keyAndFields1] [array get keyAndFields2]
+
+            } {} {needs:debug}
+        }
+
+        # Based on test, with same name, at expire.tcl:
+        test {All TTL in commands are propagated as absolute timestamp in replication stream} {
+            # Make sure that both relative and absolute expire commands are propagated
+            # Consider also comment of the test, with same name, at expire.tcl
+
+            r flushall ; # Clean up keyspace to avoid interference by keys from other tests
+            set repl [attach_to_replication_stream]
+
+            # HEXPIRE/HPEXPIRE should be translated into HPEXPIREAT
+            r hset h1 f1 v1
+            r hexpireat h1 [expr [clock seconds]+100] NX FIELDS 1 f1
+            r hset h2 f2 v2
+            r hpexpireat h2 [expr [clock seconds]*1000+100000] NX FIELDS 1 f2
+            r hset h3 f3 v3 f4 v4 f5 v5
+            # hpersist does nothing here. Verify it is not propagated.
+            r hpersist h3 FIELDS 1 f5
+            r hexpire h3 100 FIELDS 3 f3 f4 non_exists_field
+            r hpersist h3 FIELDS 1 f3
+
+            assert_replication_stream $repl {
+                {select *}
+                {hset h1 f1 v1}
+                {hpexpireat h1 * NX FIELDS 1 f1}
+                {hset h2 f2 v2}
+                {hpexpireat h2 * NX FIELDS 1 f2}
+                {hset h3 f3 v3 f4 v4 f5 v5}
+                {hpexpireat h3 * FIELDS 2 f3 f4}
+                {hpersist h3 FIELDS 1 f3}
+            }
+            close_replication_stream $repl
+        } {} {needs:repl}
+
+        test {HRANDFIELD delete expired fields and propagate DELs to replica} {
+            r debug set-active-expire 0
+            r flushall
+            set repl [attach_to_replication_stream]
+
+            # HRANDFIELD delete expired fields and propagate MULTI-EXEC DELs. Reply none.
+            r hset h1 f1 v1 f2 v2
+            r hpexpire h1 1 FIELDS 2 f1 f2
+            after 5
+            assert_equal [r hrandfield h1 2] ""
+
+            # HRANDFIELD delete expired field and propagate DEL. Reply non-expired field.
+            r hset h2 f1 v1 f2 v2
+            r hpexpire h2 1 FIELDS 1 f1
+            after 5
+            assert_equal [r hrandfield h2 2] "f2"
+
+            # HRANDFIELD delete expired field and propagate DEL. Reply none.
+            r hset h3 f1 v1
+            r hpexpire h3 1 FIELDS 1 f1
+            after 5
+            assert_equal [r hrandfield h3 2] ""
+
+            assert_replication_stream $repl {
+                {select *}
+                {hset h1 f1 v1 f2 v2}
+                {hpexpireat h1 * FIELDS 2 f1 f2}
+                {multi}
+                {hdel h1 *}
+                {hdel h1 *}
+                {exec}
+                {hset h2 f1 v1 f2 v2}
+                {hpexpireat h2 * FIELDS 1 f1}
+                {hdel h2 f1}
+                {hset h3 f1 v1}
+                {hpexpireat h3 * FIELDS 1 f1}
+                {hdel h3 f1}
+            }
+            close_replication_stream $repl
+            r debug set-active-expire 1
+        } {OK} {needs:repl}
+
+        # Start another server to test replication of TTLs
+        start_server {tags {needs:repl external:skip}} {
+            # Set the outer layer server as primary
+            set primary [srv -1 client]
+            set primary_host [srv -1 host]
+            set primary_port [srv -1 port]
+            # Set this inner layer server as replica
+            set replica [srv 0 client]
+
+            # Server should have role slave
+            $replica replicaof $primary_host $primary_port
+            wait_for_condition 50 100 {
+                [s 0 role] eq {slave}
+            } else {
+                fail "Replication not started."
+            }
+
+            # Based on test, with same name, at expire.tcl
+            test {For all replicated TTL-related commands, absolute expire times are identical on primary and replica} {
+                # Apply each TTL-related command to a unique key on primary
+                $primary flushall
+                $primary hset h1 f v
+                $primary hexpireat h1 [expr [clock seconds]+10000] FIELDS 1 f
+                $primary hset h2 f v
+                $primary hpexpireat h2 [expr [clock milliseconds]+100000] FIELDS 1 f
+                $primary hset h3 f v
+                $primary hexpire h3 100 NX FIELDS 1 f
+                $primary hset h4 f v
+                $primary hpexpire h4 100000 NX FIELDS 1 f
+                $primary hset h5 f v
+                $primary hpexpireat h5 [expr [clock milliseconds]-100000] FIELDS 1 f
+                $primary hset h9 f v
+
+                # Wait for replica to get the keys and TTLs
+                assert {[$primary wait 1 0] == 1}
+
+                # Verify absolute TTLs are identical on primary and replica for all keys
+                # This is because TTLs are always replicated as absolute values
+                assert_equal [dumpAllHashes $primary] [dumpAllHashes $replica]
+            }
+        }
+    }
+}
diff --git a/tests/unit/type/hash.tcl b/tests/unit/type/hash.tcl
index 2a26f445582..1cb42245515 100644
--- a/tests/unit/type/hash.tcl
+++ b/tests/unit/type/hash.tcl
@@ -1,4 +1,7 @@
 start_server {tags {"hash"}} {
+    r config set hash-max-listpack-value 64
+    r config set hash-max-listpack-entries 512
+
     test {HSET/HLEN - Small hash creation} {
         array set smallhash {}
         for {set i 0} {$i < 8} {incr i} {
@@ -363,6 +366,11 @@ start_server {tags {"hash"}} {
         assert_error "WRONGTYPE Operation against a key*" {r hvals wrongtype}
         assert_error "WRONGTYPE Operation against a key*" {r hkeys wrongtype}
         assert_error "WRONGTYPE Operation against a key*" {r hexists wrongtype field1}
+        assert_error "WRONGTYPE Operation against a key*" {r hset wrongtype field1 val1}
+        assert_error "WRONGTYPE Operation against a key*" {r hmset wrongtype field1 val1 field2 val2}
+        assert_error "WRONGTYPE Operation against a key*" {r hsetnx wrongtype field1 val1}
+        assert_error "WRONGTYPE Operation against a key*" {r hlen wrongtype}
+        assert_error "WRONGTYPE Operation against a key*" {r hscan wrongtype 0}
     }
 
     test {HMGET - small hash} {
@@ -429,6 +437,11 @@ start_server {tags {"hash"}} {
         lsort [r hgetall bighash]
     } [lsort [array get bighash]]
 
+    test {HGETALL against non-existing key} {
+        r del htest
+        r hgetall htest
+    } {}
+
     test {HDEL and return value} {
         set rv {}
         lappend rv [r hdel smallhash nokey]
@@ -482,6 +495,13 @@ start_server {tags {"hash"}} {
         list [r hincrby htest foo 2]
     } {2}
 
+    test {HINCRBY HINCRBYFLOAT against non-integer increment value} {
+        r del incrhash
+        r hset incrhash field 5
+        assert_error "*value is not an integer*" {r hincrby incrhash field v}
+        assert_error "*value is not a*" {r hincrbyfloat incrhash field v}
+    }
+
     test {HINCRBY against non existing hash key} {
         set rv {}
         r hdel smallhash tmp
diff --git a/tests/unit/type/incr.tcl b/tests/unit/type/incr.tcl
index c09f2e8b21e..4bc130bcb14 100644
--- a/tests/unit/type/incr.tcl
+++ b/tests/unit/type/incr.tcl
@@ -182,4 +182,44 @@ start_server {tags {"incr"}} {
         r incrbyfloat foo [expr double(-1)/41]
         r get foo
     } {0}
+
+    test {INCRBY INCRBYFLOAT DECRBY against unhappy path} {
+        r del mykeyincr
+        assert_error "*ERR wrong number of arguments*" {r incr mykeyincr v}
+        assert_error "*ERR wrong number of arguments*" {r decr mykeyincr v}
+        assert_error "*value is not an integer or out of range*" {r incrby mykeyincr v}
+        assert_error "*value is not an integer or out of range*" {r incrby mykeyincr 1.5}
+        assert_error "*value is not an integer or out of range*" {r decrby mykeyincr v}
+        assert_error "*value is not an integer or out of range*" {r decrby mykeyincr 1.5}
+        assert_error "*value is not a valid float*" {r incrbyfloat mykeyincr v}
+    }
+
+    foreach cmd {"incr" "decr" "incrby" "decrby"} {
+        test "$cmd operation should update encoding from raw to int" {
+            set res {}
+            set expected {1 12}
+            if {[string match {*incr*} $cmd]} {
+                lappend expected 13
+            } else {
+                lappend expected 11
+            }
+
+            r set foo 1
+            assert_encoding "int" foo
+            lappend res [r get foo]
+
+            r append foo 2
+            assert_encoding "raw" foo
+            lappend res [r get foo]
+
+            if {[string match {*by*} $cmd]} {
+                r $cmd foo 1
+            } else {
+                r $cmd foo
+            }
+            assert_encoding "int" foo
+            lappend res [r get foo]
+            assert_equal $res $expected
+        }
+    }
 }
diff --git a/tests/unit/type/list-2.tcl b/tests/unit/type/list-2.tcl
index 5874a902875..ac3bec8049c 100644
--- a/tests/unit/type/list-2.tcl
+++ b/tests/unit/type/list-2.tcl
@@ -4,7 +4,7 @@ start_server {
         "list-max-ziplist-size" 4
     }
 } {
-    source "tests/unit/type/list-common.tcl"
+    array set largevalue [generate_largevalue_test_array]
 
     foreach {type large} [array get largevalue] {
         tags {"slow"} {
diff --git a/tests/unit/type/list-common.tcl b/tests/unit/type/list-common.tcl
deleted file mode 100644
index b393737c989..00000000000
--- a/tests/unit/type/list-common.tcl
+++ /dev/null
@@ -1,4 +0,0 @@
-# We need a value to make sure the list has the right encoding when it is inserted.
-array set largevalue {}
-set largevalue(listpack) "hello"
-set largevalue(quicklist) [string repeat "x" 8192]
diff --git a/tests/unit/type/list.tcl b/tests/unit/type/list.tcl
index 993b6d13529..9f46a8beb7e 100644
--- a/tests/unit/type/list.tcl
+++ b/tests/unit/type/list.tcl
@@ -1,95 +1,54 @@
-# check functionality compression of plain and zipped nodes
+# check functionality compression of plain and packed nodes
 start_server [list overrides [list save ""] ] {
     r config set list-compress-depth 2
     r config set list-max-ziplist-size 1
 
-    # 3 test to check compression with regular ziplist nodes
+    # 3 test to check compression with plain and packed nodes
     # 1. using push + insert
     # 2. using push + insert + trim
     # 3. using push + insert + set
 
-    test {reg node check compression with insert and pop} {
-        r lpush list1 [string repeat a 500]
-        r lpush list1 [string repeat b 500]
-        r lpush list1 [string repeat c 500]
-        r lpush list1 [string repeat d 500]
-        r linsert list1 after [string repeat d 500] [string repeat e 500]
-        r linsert list1 after [string repeat d 500] [string repeat f 500]
-        r linsert list1 after [string repeat d 500] [string repeat g 500]
-        r linsert list1 after [string repeat d 500] [string repeat j 500]
-        assert_equal [r lpop list1] [string repeat d 500]
-        assert_equal [r lpop list1] [string repeat j 500]
-        assert_equal [r lpop list1] [string repeat g 500]
-        assert_equal [r lpop list1] [string repeat f 500]
-        assert_equal [r lpop list1] [string repeat e 500]
-        assert_equal [r lpop list1] [string repeat c 500]
-        assert_equal [r lpop list1] [string repeat b 500]
-        assert_equal [r lpop list1] [string repeat a 500]
+    foreach {container size} {packed 500 plain 8193} {
+    test "$container node check compression with insert and pop" {
+        r flushdb
+        r lpush list1 [string repeat a $size]
+        r lpush list1 [string repeat b $size]
+        r lpush list1 [string repeat c $size]
+        r lpush list1 [string repeat d $size]
+        r linsert list1 after [string repeat d $size] [string repeat e $size]
+        r linsert list1 after [string repeat d $size] [string repeat f $size]
+        r linsert list1 after [string repeat d $size] [string repeat g $size]
+        r linsert list1 after [string repeat d $size] [string repeat j $size]
+        assert_equal [r lpop list1] [string repeat d $size]
+        assert_equal [r lpop list1] [string repeat j $size]
+        assert_equal [r lpop list1] [string repeat g $size]
+        assert_equal [r lpop list1] [string repeat f $size]
+        assert_equal [r lpop list1] [string repeat e $size]
+        assert_equal [r lpop list1] [string repeat c $size]
+        assert_equal [r lpop list1] [string repeat b $size]
+        assert_equal [r lpop list1] [string repeat a $size]
     };
 
-    test {reg node check compression combined with trim} {
-        r lpush list2 [string repeat a 500]
-        r linsert list2 after  [string repeat a 500] [string repeat b 500]
-        r rpush list2 [string repeat c 500]
-        assert_equal [string repeat b 500] [r lindex list2 1]
+    test "$container node check compression combined with trim" {
+        r flushdb
+        r lpush list2 [string repeat a $size]
+        r linsert list2 after  [string repeat a $size] [string repeat b $size]
+        r rpush list2 [string repeat c $size]
+        assert_equal [string repeat b $size] [r lindex list2 1]
         r LTRIM list2 1 -1
         r llen list2
     } {2}
 
-    test {reg node check compression with lset} {
-        r lpush list3 [string repeat a 500]
-        r LSET list3 0 [string repeat b 500]
-        assert_equal [string repeat b 500] [r lindex list3 0]
-        r lpush list3 [string repeat c 500]
-        r LSET list3 0 [string repeat d 500]
-        assert_equal [string repeat d 500] [r lindex list3 0]
+    test "$container node check compression with lset" {
+        r flushdb
+        r lpush list3 [string repeat a $size]
+        r LSET list3 0 [string repeat b $size]
+        assert_equal [string repeat b $size] [r lindex list3 0]
+        r lpush list3 [string repeat c $size]
+        r LSET list3 0 [string repeat d $size]
+        assert_equal [string repeat d $size] [r lindex list3 0]
     }
-
-    # repeating the 3 tests with plain nodes
-    # (by adjusting quicklist-packed-threshold)
-
-    test {plain node check compression} {
-        r debug quicklist-packed-threshold 1b
-        r lpush list4 [string repeat a 500]
-        r lpush list4 [string repeat b 500]
-        r lpush list4 [string repeat c 500]
-        r lpush list4 [string repeat d 500]
-        r linsert list4 after [string repeat d 500] [string repeat e 500]
-        r linsert list4 after [string repeat d 500] [string repeat f 500]
-        r linsert list4 after [string repeat d 500] [string repeat g 500]
-        r linsert list4 after [string repeat d 500] [string repeat j 500]
-        assert_equal [r lpop list4] [string repeat d 500]
-        assert_equal [r lpop list4] [string repeat j 500]
-        assert_equal [r lpop list4] [string repeat g 500]
-        assert_equal [r lpop list4] [string repeat f 500]
-        assert_equal [r lpop list4] [string repeat e 500]
-        assert_equal [r lpop list4] [string repeat c 500]
-        assert_equal [r lpop list4] [string repeat b 500]
-        assert_equal [r lpop list4] [string repeat a 500]
-        r debug quicklist-packed-threshold 0
-    } {OK} {needs:debug}
-
-    test {plain node check compression with ltrim} {
-        r debug quicklist-packed-threshold 1b
-        r lpush list5 [string repeat a 500]
-        r linsert list5 after  [string repeat a 500] [string repeat b 500]
-        r rpush list5 [string repeat c 500]
-        assert_equal [string repeat b 500] [r lindex list5 1]
-        r LTRIM list5 1 -1
-        assert_equal [r llen list5] 2
-        r debug quicklist-packed-threshold 0
-    } {OK} {needs:debug}
-
-    test {plain node check compression using lset} {
-        r debug quicklist-packed-threshold 1b
-        r lpush list6 [string repeat a 500]
-        r LSET list6 0 [string repeat b 500]
-        assert_equal [string repeat b 500] [r lindex list6 0]
-        r lpush list6 [string repeat c 500]
-        r LSET list6 0 [string repeat d 500]
-        assert_equal [string repeat d 500] [r lindex list6 0]
-        r debug quicklist-packed-threshold 0
-    } {OK} {needs:debug}
+    } ;# foreach
 
     # revert config for external mode tests.
     r config set list-compress-depth 0
@@ -97,6 +56,13 @@ start_server [list overrides [list save ""] ] {
 
 # check functionality of plain nodes using low packed-threshold
 start_server [list overrides [list save ""] ] {
+foreach type {listpack quicklist} {
+    if {$type eq "listpack"} {
+        r config set list-max-listpack-size -2
+    } else {
+        r config set list-max-listpack-size 1
+    }
+
     # basic command check for plain nodes - "LPUSH & LPOP"
     test {Test LPUSH and LPOP on plain nodes} {
         r flushdb
@@ -104,6 +70,7 @@ start_server [list overrides [list save ""] ] {
         r lpush lst 9
         r lpush lst xxxxxxxxxx
         r lpush lst xxxxxxxxxx
+        assert_encoding $type lst
         set s0 [s used_memory]
         assert {$s0 > 10}
         assert {[r llen lst] == 3}
@@ -128,6 +95,7 @@ start_server [list overrides [list save ""] ] {
         r lpush lst xxxxxxxxxxx
         r lpush lst 9
         r lpush lst xxxxxxxxxxx
+        assert_encoding $type lst
         r linsert lst before "9" "8"
         assert {[r lindex lst 1] eq "8"}
         r linsert lst BEFORE "9" "7"
@@ -143,6 +111,7 @@ start_server [list overrides [list save ""] ] {
         r lpush lst1 9
         r lpush lst1 xxxxxxxxxxx
         r lpush lst1 9
+        assert_encoding $type lst1
         r LTRIM lst1 1 -1
         assert_equal [r llen lst1] 2
         r debug quicklist-packed-threshold 0
@@ -154,6 +123,7 @@ start_server [list overrides [list save ""] ] {
         r debug quicklist-packed-threshold 1b
         r lpush lst one
         r lpush lst xxxxxxxxxxx
+        assert_encoding $type lst
         set s0 [s used_memory]
         assert {$s0 > 10}
         r lpush lst 9
@@ -169,6 +139,7 @@ start_server [list overrides [list save ""] ] {
         r RPUSH lst "aa"
         r RPUSH lst "bb"
         r RPUSH lst "cc"
+        assert_encoding $type lst
         r LSET lst 0 "xxxxxxxxxxx"
         assert_equal [r LPOS lst "xxxxxxxxxxx"] 0
         r debug quicklist-packed-threshold 0
@@ -180,6 +151,7 @@ start_server [list overrides [list save ""] ] {
         r debug quicklist-packed-threshold 1b
         r RPUSH lst2{t} "aa"
         r RPUSH lst2{t} "bb"
+        assert_encoding $type lst2{t}
         r LSET lst2{t} 0 xxxxxxxxxxx
         r RPUSH lst2{t} "cc"
         r RPUSH lst2{t} "dd"
@@ -200,6 +172,7 @@ start_server [list overrides [list save ""] ] {
         r debug quicklist-packed-threshold 5b
         r RPUSH lst "aa"
         r RPUSH lst "bb"
+        assert_encoding $type lst
         r lset lst 0 [string repeat d 50001]
         set s1 [r lpop lst]
         assert_equal $s1 [string repeat d 50001]
@@ -220,6 +193,7 @@ start_server [list overrides [list save ""] ] {
 
     # checking LSET in case ziplist needs to be split
     test {Test LSET with packed is split in the middle} {
+        set original_config [config_get_set list-max-listpack-size 4]
         r flushdb
         r debug quicklist-packed-threshold 5b
         r RPUSH lst "aa"
@@ -227,6 +201,7 @@ start_server [list overrides [list save ""] ] {
         r RPUSH lst "cc"
         r RPUSH lst "dd"
         r RPUSH lst "ee"
+        assert_encoding quicklist lst
         r lset lst 2 [string repeat e 10]
         assert_equal [r lpop lst] "aa"
         assert_equal [r lpop lst] "bb"
@@ -234,6 +209,7 @@ start_server [list overrides [list save ""] ] {
         assert_equal [r lpop lst] "dd"
         assert_equal [r lpop lst] "ee"
         r debug quicklist-packed-threshold 0
+        r config set list-max-listpack-size $original_config
     } {OK} {needs:debug}
 
 
@@ -278,6 +254,7 @@ start_server [list overrides [list save ""] ] {
         # Insert two elements and keep them in the same node
         r RPUSH lst $small_ele
         r RPUSH lst $small_ele
+        assert_encoding $type lst
 
         # When setting the position of -1 to a large element, we first insert
         # a large element at the end and then delete its previous element.
@@ -294,6 +271,7 @@ start_server [list overrides [list save ""] ] {
 
         r LPUSH lst "aa"
         r LPUSH lst "bb"
+        assert_encoding $type lst
         r LSET lst -2 [string repeat x 10]
         r RPOP lst
         assert_equal [string repeat x 10] [r LRANGE lst 0 -1]
@@ -301,6 +279,7 @@ start_server [list overrides [list save ""] ] {
         r debug quicklist-packed-threshold 0
     } {OK} {needs:debug}
 }
+}
 
 run_solo {list-large-memory} {
 start_server [list overrides [list save ""] ] {
@@ -381,7 +360,63 @@ if {[lindex [r config get proto-max-bulk-len] 1] == 10000000000} {
        assert_equal [read_big_bulk {r rpop lst}] $str_length
    } {} {large-memory}
 
-   test {Test LMOVE on plain nodes over 4GB} {
+    test {Test LSET on plain nodes with large elements under packed_threshold over 4GB} {
+        r flushdb
+        r rpush lst a b c d e
+        for {set i 0} {$i < 5} {incr i} {
+            r write "*4\r\n\$4\r\nlset\r\n\$3\r\nlst\r\n\$1\r\n$i\r\n"
+            write_big_bulk 1000000000
+        }
+        r ping
+    } {PONG} {large-memory}
+
+    test {Test LSET splits a quicklist node, and then merge} {
+        # Test when a quicklist node can't be inserted and is split, the split
+        # node merges with the node before it and the `before` node is kept.
+        r flushdb
+        r rpush lst [string repeat "x" 4096]
+        r lpush lst a b c d e f g
+        r lpush lst [string repeat "y" 4096]
+        # now: [y...]    [g f e d c b a x...]
+        #      (node0)        (node1)
+        # Keep inserting elements into node1 until node1 is split into two
+        # nodes([g] [...]), eventually node0 will merge with the [g] node.
+        # Since node0 is larger, after the merge node0 will be kept and
+        # the [g] node will be deleted.
+        for {set i 7} {$i >= 3} {incr i -1} {
+            r write "*4\r\n\$4\r\nlset\r\n\$3\r\nlst\r\n\$1\r\n$i\r\n"
+            write_big_bulk 1000000000
+        }
+        assert_equal "g" [r lindex lst 1]
+        r ping
+    } {PONG} {large-memory}
+
+    test {Test LSET splits a LZF compressed quicklist node, and then merge} {
+        # Test when a LZF compressed quicklist node can't be inserted and is split,
+        # the split node merges with the node before it and the split node is kept.
+        r flushdb
+        r config set list-compress-depth 1
+        r lpush lst [string repeat "x" 2000]
+        r rpush lst [string repeat "y" 7000]
+        r rpush lst a b c d e f g
+        r rpush lst [string repeat "z" 8000]
+        r lset lst 0 h
+        # now: [h]     [y... a b c d e f g] [z...]
+        #      node0        node1(LZF)
+        # Keep inserting elements into node1 until node1 is split into two
+        # nodes([y...] [...]), eventually node0 will merge with the [y...] node.
+        # Since [y...] node is larger, after the merge node0 will be deleted and
+        # the [y...] node will be kept.
+        for {set i 7} {$i >= 3} {incr i -1} {
+            r write "*4\r\n\$4\r\nlset\r\n\$3\r\nlst\r\n\$1\r\n$i\r\n"
+            write_big_bulk 1000000000
+        }
+        assert_equal "h" [r lindex lst 0]
+        r config set list-compress-depth 0
+        r ping
+    } {PONG} {large-memory}
+
+    test {Test LMOVE on plain nodes over 4GB} {
        r flushdb
        r RPUSH lst2{t} "aa"
        r RPUSH lst2{t} "bb"
@@ -413,7 +448,7 @@ start_server {
         "list-max-ziplist-size" -1
     }
 } {
-    source "tests/unit/type/list-common.tcl"
+    array set largevalue [generate_largevalue_test_array]
 
     # A helper function to execute either B*POP or BLMPOP* with one input key.
     proc bpop_command {rd pop key timeout} {
@@ -1120,7 +1155,7 @@ foreach {pop} {BLPOP BLMPOP_LEFT} {
         assert_equal {} [$rd read]
         $rd deferred 0
         # We want to force key deletion to be propagated to the replica 
-        # in order to verify it was expiered on the replication stream. 
+        # in order to verify it was expired on the replication stream.
         $rd set somekey1 someval1
         $rd exists k
         r set somekey2 someval2
@@ -1168,7 +1203,7 @@ foreach {pop} {BLPOP BLMPOP_LEFT} {
         r client unblock $id
         assert_equal {} [$rd read]
         # We want to force key deletion to be propagated to the replica 
-        # in order to verify it was expiered on the replication stream. 
+        # in order to verify it was expired on the replication stream.
         $rd exists k
         assert_equal {0} [$rd read]
         assert_replication_stream $repl {
@@ -1186,6 +1221,34 @@ foreach {pop} {BLPOP BLMPOP_LEFT} {
         r select 9
     } {OK} {singledb:skip needs:debug}
 
+    test {BLPOP unblock but the key is expired and then block again - reprocessing command} {
+        r flushall
+        r debug set-active-expire 0
+        set rd [redis_deferring_client]
+
+        set start [clock milliseconds]
+        $rd blpop mylist 1
+        wait_for_blocked_clients_count 1
+
+        # The exec will try to awake the blocked client, but the key is expired,
+        # so the client will be blocked again during the command reprocessing.
+        r multi
+        r rpush mylist a
+        r pexpire mylist 100
+        r debug sleep 0.2
+        r exec
+
+        assert_equal {} [$rd read]
+        set end [clock milliseconds]
+
+        # Before the fix in #13004, this time would have been 1200+ (i.e. more than 1200ms),
+        # now it should be 1000, but in order to avoid timing issues, we increase the range a bit.
+        assert_range [expr $end-$start] 1000 1150
+
+        r debug set-active-expire 1
+        $rd close
+    } {0} {needs:debug}
+
 foreach {pop} {BLPOP BLMPOP_LEFT} {
     test "$pop when new key is moved into place" {
         set rd [redis_deferring_client]
diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl
index 29275622d13..c194ee278dd 100644
--- a/tests/unit/type/set.tcl
+++ b/tests/unit/type/set.tcl
@@ -1029,6 +1029,12 @@ foreach type {single multiple single_multiple} {
         r srem $myset {*}$members
     }
 
+    proc verify_rehashing_completed_key {myset table_size keys} {
+        set htstats [r debug HTSTATS-KEY $myset]
+        assert {![string match {*rehashing target*} $htstats]}
+        return {[string match {*table size: $table_size*number of elements: $keys*} $htstats]}
+    }
+
     test "SRANDMEMBER with a dict containing long chain" {
         set origin_save [config_get_set save ""]
         set origin_max_lp [config_get_set set-max-listpack-entries 0]
@@ -1099,7 +1105,10 @@ foreach type {single multiple single_multiple} {
         #    otherwise we would need more iterations.
         rem_hash_set_top_N myset [expr {[r scard myset] - 30}]
         assert_equal [r scard myset] 30
-        assert {[is_rehashing myset]}
+        
+        # Hash set rehashing would be completed while removing members from the `myset`
+        # We also check the size and members in the hash table.
+        verify_rehashing_completed_key myset 64 30
 
         # Now that we have a hash set with only one long chain bucket.
         set htstats [r debug HTSTATS-KEY myset full]
diff --git a/tests/unit/type/stream-cgroups.tcl b/tests/unit/type/stream-cgroups.tcl
index a6cc5da7df2..d5754d42be7 100644
--- a/tests/unit/type/stream-cgroups.tcl
+++ b/tests/unit/type/stream-cgroups.tcl
@@ -475,7 +475,7 @@ start_server {
         $rd close 
     }
 
-    test {Blocking XREADGROUP for stream key that has clients blocked on list - avoid endless loop} {
+    test {Blocking XREADGROUP for stream key that has clients blocked on stream - avoid endless loop} {
         r DEL mystream
         r XGROUP CREATE mystream mygroup $ MKSTREAM
 
@@ -498,6 +498,34 @@ start_server {
         assert_equal [r ping] {PONG}
     }
 
+    test {Blocking XREADGROUP for stream key that has clients blocked on stream - reprocessing command} {
+        r DEL mystream
+        r XGROUP CREATE mystream mygroup $ MKSTREAM
+
+        set rd1 [redis_deferring_client]
+        set rd2 [redis_deferring_client]
+
+        $rd1 xreadgroup GROUP mygroup myuser BLOCK 0 STREAMS mystream >
+        wait_for_blocked_clients_count 1
+
+        set start [clock milliseconds]
+        $rd2 xreadgroup GROUP mygroup myuser BLOCK 1000 STREAMS mystream >
+        wait_for_blocked_clients_count 2
+
+        # After a while call xadd and let rd2 re-process the command.
+        after 200
+        r xadd mystream * field value
+        assert_equal {} [$rd2 read]
+        set end [clock milliseconds]
+
+        # Before the fix in #13004, this time would have been 1200+ (i.e. more than 1200ms),
+        # now it should be 1000, but in order to avoid timing issues, we increase the range a bit.
+        assert_range [expr $end-$start] 1000 1150
+
+        $rd1 close
+        $rd2 close
+    }
+
     test {XGROUP DESTROY should unblock XREADGROUP with -NOGROUP} {
         r config resetstat
         r del mystream
@@ -981,6 +1009,68 @@ start_server {
         assert_error "*NOGROUP*" {r XGROUP CREATECONSUMER mystream mygroup consumer}
     }
 
+    test {XREADGROUP of multiple entries changes dirty by one} {
+        r DEL x
+        r XADD x 1-0 data a
+        r XADD x 2-0 data b
+        r XADD x 3-0 data c
+        r XADD x 4-0 data d
+        r XGROUP CREATE x g1 0
+        r XGROUP CREATECONSUMER x g1 Alice
+
+        set dirty [s rdb_changes_since_last_save]
+        set res [r XREADGROUP GROUP g1 Alice COUNT 2 STREAMS x ">"]
+        assert_equal $res {{x {{1-0 {data a}} {2-0 {data b}}}}}
+        set dirty2 [s rdb_changes_since_last_save]
+        assert {$dirty2 == $dirty + 1}
+
+        set dirty [s rdb_changes_since_last_save]
+        set res [r XREADGROUP GROUP g1 Alice NOACK COUNT 2 STREAMS x ">"]
+        assert_equal $res {{x {{3-0 {data c}} {4-0 {data d}}}}}
+        set dirty2 [s rdb_changes_since_last_save]
+        assert {$dirty2 == $dirty + 1}
+    }
+
+    test {XREADGROUP from PEL does not change dirty} {
+        # Techinally speaking, XREADGROUP from PEL should cause propagation
+        # because it change the delivery count/time
+        # It was decided that this metadata changes are too insiginificant
+        # to justify propagation
+        # This test covers that.
+        r DEL x
+        r XADD x 1-0 data a
+        r XADD x 2-0 data b
+        r XADD x 3-0 data c
+        r XADD x 4-0 data d
+        r XGROUP CREATE x g1 0
+        r XGROUP CREATECONSUMER x g1 Alice
+
+        set res [r XREADGROUP GROUP g1 Alice COUNT 2 STREAMS x ">"]
+        assert_equal $res {{x {{1-0 {data a}} {2-0 {data b}}}}}
+
+        set dirty [s rdb_changes_since_last_save]
+        set res [r XREADGROUP GROUP g1 Alice COUNT 2 STREAMS x 0]
+        assert_equal $res {{x {{1-0 {data a}} {2-0 {data b}}}}}
+        set dirty2 [s rdb_changes_since_last_save]
+        assert {$dirty2 == $dirty}
+
+        set dirty [s rdb_changes_since_last_save]
+        set res [r XREADGROUP GROUP g1 Alice COUNT 2 STREAMS x 9000]
+        assert_equal $res {{x {}}}
+        set dirty2 [s rdb_changes_since_last_save]
+        assert {$dirty2 == $dirty}
+
+        # The current behavior is that we create the consumer (causes dirty++) even
+        # if we onlyneed to read from PEL.
+        # It feels like we shouldn't create the consumer in that case, but I added
+        # this test just for coverage of current behavior
+        set dirty [s rdb_changes_since_last_save]
+        set res [r XREADGROUP GROUP g1 noconsumer COUNT 2 STREAMS x 0]
+        assert_equal $res {{x {}}}
+        set dirty2 [s rdb_changes_since_last_save]
+        assert {$dirty2 == $dirty + 1}
+    }
+
     start_server {tags {"stream needs:debug"} overrides {appendonly yes aof-use-rdb-preamble no appendfsync always}} {
         test {XREADGROUP with NOACK creates consumer} {
             r del mystream
@@ -1249,7 +1339,7 @@ start_server {
         set replica [srv 0 client]
 
         foreach autoclaim {0 1} {
-            test "Replication tests of XCLAIM with deleted entries (autclaim=$autoclaim)" {
+            test "Replication tests of XCLAIM with deleted entries (autoclaim=$autoclaim)" {
                 $replica replicaof $master_host $master_port
                 wait_for_condition 50 100 {
                     [s 0 master_link_status] eq {up}
@@ -1280,6 +1370,39 @@ start_server {
                 }
             }
         }
+
+        test {XREADGROUP ACK would propagate entries-read} {
+            $master del mystream
+            $master xadd mystream * a b c d e f
+            $master xgroup create mystream mygroup $
+            $master xreadgroup group mygroup ryan count 1 streams mystream >
+            $master xadd mystream * a1 b1 a1 b2
+            $master xadd mystream * name v1 name v1
+            $master xreadgroup group mygroup ryan count 1 streams mystream >
+            $master xreadgroup group mygroup ryan count 1 streams mystream >
+
+            set reply [$master XINFO STREAM mystream FULL]
+            set group [lindex [dict get $reply groups] 0]
+            assert_equal [dict get $group entries-read] 3
+            assert_equal [dict get $group lag] 0
+
+            set reply [$replica XINFO STREAM mystream FULL]
+            set group [lindex [dict get $reply groups] 0]
+            assert_equal [dict get $group entries-read] 3
+            assert_equal [dict get $group lag] 0
+        }
+
+        test {XREADGROUP from PEL inside MULTI} {
+            # This scenario used to cause propagation of EXEC without MULTI in 6.2
+            $replica config set propagation-error-behavior panic
+            $master del mystream
+            $master xadd mystream 1-0 a b c d e f
+            $master xgroup create mystream mygroup 0
+            assert_equal [$master xreadgroup group mygroup ryan count 1 streams mystream >] {{mystream {{1-0 {a b c d e f}}}}}
+            $master multi
+            $master xreadgroup group mygroup ryan count 1 streams mystream 0
+            $master exec
+        }
     }
 
     start_server {tags {"stream needs:debug"} overrides {appendonly yes aof-use-rdb-preamble no}} {
diff --git a/tests/unit/type/stream.tcl b/tests/unit/type/stream.tcl
index 3081c40d141..06f58c8a2f4 100644
--- a/tests/unit/type/stream.tcl
+++ b/tests/unit/type/stream.tcl
@@ -394,6 +394,122 @@ start_server {
         $rd close
     }
 
+    test {XREAD last element from non-empty stream} {
+        # should return last entry
+
+        # add 3 entries to a stream
+        r DEL lestream
+        r XADD lestream 1-0 k1 v1
+        r XADD lestream 2-0 k2 v2
+        r XADD lestream 3-0 k3 v3
+
+        # read the last entry
+        set res [r XREAD STREAMS lestream +]
+
+        # verify it's the last entry
+        assert_equal $res {{lestream {{3-0 {k3 v3}}}}}
+
+        # two more entries, with MAX_UINT64 for sequence number for the last one
+        r XADD lestream 3-18446744073709551614 k4 v4
+        r XADD lestream 3-18446744073709551615 k5 v5
+
+        # read the new last entry
+        set res [r XREAD STREAMS lestream +]
+
+        # verify it's the last entry
+        assert_equal $res {{lestream {{3-18446744073709551615 {k5 v5}}}}}
+    }
+
+    test {XREAD last element from empty stream} {
+        # should return nil
+
+        # make sure the stream is empty
+        r DEL lestream
+
+        # read last entry and verify nil is received
+        assert_equal [r XREAD STREAMS lestream +] {}
+
+        # add an element to the stream, than delete it
+        r XADD lestream 1-0 k1 v1
+        r XDEL lestream 1-0
+
+        # verify nil is still received when reading last entry
+        assert_equal [r XREAD STREAMS lestream +] {}
+    }
+
+    test {XREAD last element blocking from empty stream} {
+        # should block until a new entry is available
+
+        # make sure there is no stream
+        r DEL lestream
+
+        # read last entry from stream, blocking
+        set rd [redis_deferring_client]
+        $rd XREAD BLOCK 20000 STREAMS lestream +
+        wait_for_blocked_client
+
+        # add an entry to the stream
+        r XADD lestream 1-0 k1 v1
+
+        # read and verify result
+        set res [$rd read]
+        assert_equal $res {{lestream {{1-0 {k1 v1}}}}}
+        $rd close
+    }
+
+    test {XREAD last element blocking from non-empty stream} {
+        # should return last element immediately, w/o blocking
+
+        # add 3 entries to a stream
+        r DEL lestream
+        r XADD lestream 1-0 k1 v1
+        r XADD lestream 2-0 k2 v2
+        r XADD lestream 3-0 k3 v3
+
+        # read the last entry
+        set res [r XREAD BLOCK 1000000 STREAMS lestream +]
+
+        # verify it's the last entry
+        assert_equal $res {{lestream {{3-0 {k3 v3}}}}}
+    }
+
+    test {XREAD last element from multiple streams} {
+        # should return last element only from non-empty streams
+
+        # add 3 entries to one stream
+        r DEL "\{lestream\}1"
+        r XADD "\{lestream\}1" 1-0 k1 v1
+        r XADD "\{lestream\}1" 2-0 k2 v2
+        r XADD "\{lestream\}1" 3-0 k3 v3
+
+        # add 3 entries to another stream
+        r DEL "\{lestream\}2"
+        r XADD "\{lestream\}2" 1-0 k1 v4
+        r XADD "\{lestream\}2" 2-0 k2 v5
+        r XADD "\{lestream\}2" 3-0 k3 v6
+
+        # read last element from 3 streams (2 with enetries, 1 non-existent)
+        # verify the last element from the two existing streams were returned
+        set res [r XREAD STREAMS "\{lestream\}1" "\{lestream\}2" "\{lestream\}3" + + +]
+        assert_equal $res {{{{lestream}1} {{3-0 {k3 v3}}}} {{{lestream}2} {{3-0 {k3 v6}}}}}
+    }
+
+    test {XREAD last element with count > 1} {
+        # Should return only the last element - count has no affect here
+
+        # add 3 entries to a stream
+        r DEL lestream
+        r XADD lestream 1-0 k1 v1
+        r XADD lestream 2-0 k2 v2
+        r XADD lestream 3-0 k3 v3
+
+        # read the last entry
+        set res [r XREAD COUNT 3 STREAMS lestream +]
+
+        # verify only last entry was read, even though COUNT > 1
+        assert_equal $res {{lestream {{3-0 {k3 v3}}}}}
+    }
+
     test "XREAD: XADD + DEL should not awake client" {
         set rd [redis_deferring_client]
         r del s1
diff --git a/tests/unit/type/string.tcl b/tests/unit/type/string.tcl
index 5a8af7b53af..94702ec3dc3 100644
--- a/tests/unit/type/string.tcl
+++ b/tests/unit/type/string.tcl
@@ -656,4 +656,19 @@ if {[string match {*jemalloc*} [s mem_allocator]]} {
            }
         }
     }
+
+    test {APPEND modifies the encoding from int to raw} {
+        r del foo
+        r set foo 1
+        assert_encoding "int" foo
+        r append foo 2
+
+        set res {}
+        lappend res [r get foo]
+        assert_encoding "raw" foo
+        
+        r set bar 12
+        assert_encoding "int" bar
+        lappend res [r get bar]
+    } {12 12}
 }
diff --git a/tests/unit/type/zset.tcl b/tests/unit/type/zset.tcl
index 33427d89c2b..dc0554d8411 100644
--- a/tests/unit/type/zset.tcl
+++ b/tests/unit/type/zset.tcl
@@ -308,6 +308,12 @@ start_server {tags {"zset"}} {
             assert_error "*NaN*" {r zincrby myzset -inf abc}
         }
 
+        test "ZINCRBY against invalid incr value - $encoding" {
+            r del zincr
+            r zadd zincr 1 "one"
+            assert_error "*value is not a valid*" {r zincrby zincr v "one"}
+        }
+
         test "ZADD - Variadic version base case - $encoding" {
             r del myzset
             list [r zadd myzset 10 a 20 b 30 c] [r zrange myzset 0 -1 withscores]
@@ -506,6 +512,13 @@ start_server {tags {"zset"}} {
             create_zset zset {-inf a 1 b 2 c 3 d 4 e 5 f +inf g}
         }
 
+        proc create_long_zset {key length} {
+            r del $key
+            for {set i 0} {$i < $length} {incr i 1} {
+                r zadd $key $i i$i
+            }
+        }
+
         test "ZRANGEBYSCORE/ZREVRANGEBYSCORE/ZCOUNT basics - $encoding" {
             create_default_zset
 
@@ -574,6 +587,16 @@ start_server {tags {"zset"}} {
             assert_equal {d c b} [r zrevrangebyscore zset 10 0 LIMIT 2 3]
             assert_equal {d c b} [r zrevrangebyscore zset 10 0 LIMIT 2 10]
             assert_equal {}      [r zrevrangebyscore zset 10 0 LIMIT 20 10]
+            # zrangebyscore uses different logic when offset > ZSKIPLIST_MAX_SEARCH
+            create_long_zset zset 30
+            assert_equal {i12 i13 i14} [r zrangebyscore zset 0 20 LIMIT 12 3]
+            assert_equal {i14 i15}     [r zrangebyscore zset 0 20 LIMIT 14 2]
+            assert_equal {i19 i20 i21} [r zrangebyscore zset 0 30 LIMIT 19 3]
+            assert_equal {i29}     [r zrangebyscore zset 10 30 LIMIT 19 2]
+            assert_equal {i17 i16 i15} [r zrevrangebyscore zset 30 10 LIMIT 12 3]
+            assert_equal {i6 i5}       [r zrevrangebyscore zset 20 0 LIMIT 14 2]
+            assert_equal {i2 i1 i0}    [r zrevrangebyscore zset 20 0 LIMIT 18 5]
+            assert_equal {i0}          [r zrevrangebyscore zset 20 0 LIMIT 20 5]
         }
 
         test "ZRANGEBYSCORE with LIMIT and WITHSCORES - $encoding" {
@@ -595,6 +618,14 @@ start_server {tags {"zset"}} {
                               0 omega}
         }
 
+        proc create_long_lex_zset {} {
+            create_zset zset {0 alpha 0 bar 0 cool 0 down
+                              0 elephant 0 foo 0 great 0 hill
+                              0 island 0 jacket 0 key 0 lip 
+                              0 max 0 null 0 omega 0 point
+                              0 query 0 result 0 sea 0 tree}
+        }
+
         test "ZRANGEBYLEX/ZREVRANGEBYLEX/ZLEXCOUNT basics - $encoding" {
             create_default_lex_zset
 
@@ -640,7 +671,7 @@ start_server {tags {"zset"}} {
             assert_equal 1 [r zlexcount zset (maxstring +]
         }
 
-        test "ZRANGEBYSLEX with LIMIT - $encoding" {
+        test "ZRANGEBYLEX with LIMIT - $encoding" {
             create_default_lex_zset
             assert_equal {alpha bar} [r zrangebylex zset - \[cool LIMIT 0 2]
             assert_equal {bar cool} [r zrangebylex zset - \[cool LIMIT 1 2]
@@ -651,6 +682,22 @@ start_server {tags {"zset"}} {
             assert_equal {bar cool down} [r zrangebylex zset \[bar \[down LIMIT 0 100]
             assert_equal {omega hill great foo elephant} [r zrevrangebylex zset + \[d LIMIT 0 5]
             assert_equal {omega hill great foo} [r zrevrangebylex zset + \[d LIMIT 0 4]
+            assert_equal {great foo elephant} [r zrevrangebylex zset + \[d LIMIT 2 3]
+            # zrangebylex uses different logic when offset > ZSKIPLIST_MAX_SEARCH
+            create_long_lex_zset
+            assert_equal {max null} [r zrangebylex zset - \[tree LIMIT 12 2]
+            assert_equal {point query} [r zrangebylex zset - \[tree LIMIT 15 2]
+            assert_equal {} [r zrangebylex zset \[max \[tree LIMIT 10 0]
+            assert_equal {} [r zrangebylex zset \[max \[tree LIMIT 12 0]
+            assert_equal {max} [r zrangebylex zset \[max \[null LIMIT 0 1]
+            assert_equal {null} [r zrangebylex zset \[max \[null LIMIT 1 1]
+            assert_equal {max null omega point} [r zrangebylex zset \[max \[point LIMIT 0 100]
+            assert_equal {tree sea result query point} [r zrevrangebylex zset + \[o LIMIT 0 5]
+            assert_equal {tree sea result query} [r zrevrangebylex zset + \[o LIMIT 0 4]
+            assert_equal {omega null max lip} [r zrevrangebylex zset + \[l LIMIT 5 4]
+            assert_equal {elephant down} [r zrevrangebylex zset + \[a LIMIT 15 2]
+            assert_equal {bar alpha} [r zrevrangebylex zset + - LIMIT 18 6]
+            assert_equal {hill great foo} [r zrevrangebylex zset + \[c LIMIT 12 3]
         }
 
         test "ZRANGEBYLEX with invalid lex range specifiers - $encoding" {
@@ -1942,6 +1989,34 @@ start_server {tags {"zset"}} {
         }
     }
 
+        test {BZPOPMIN unblock but the key is expired and then block again - reprocessing command} {
+            r flushall
+            r debug set-active-expire 0
+            set rd [redis_deferring_client]
+
+            set start [clock milliseconds]
+            $rd bzpopmin zset{t} 1
+            wait_for_blocked_clients_count 1
+
+            # The exec will try to awake the blocked client, but the key is expired,
+            # so the client will be blocked again during the command reprocessing.
+            r multi
+            r zadd zset{t} 1 one
+            r pexpire zset{t} 100
+            r debug sleep 0.2
+            r exec
+
+            assert_equal {} [$rd read]
+            set end [clock milliseconds]
+
+            # Before the fix in #13004, this time would have been 1200+ (i.e. more than 1200ms),
+            # now it should be 1000, but in order to avoid timing issues, we increase the range a bit.
+            assert_range [expr $end-$start] 1000 1150
+
+            r debug set-active-expire 1
+            $rd close
+        } {0} {needs:debug}
+
         test "BZPOPMIN with same key multiple times should work" {
             set rd [redis_deferring_client]
             r del z1{t} z2{t}
@@ -2211,12 +2286,18 @@ start_server {tags {"zset"}} {
     } {b 2 c 3}
 
     test {ZRANGESTORE BYLEX} {
+        set res [r zrangestore z3{t} z1{t} \[b \[c BYLEX]
+        assert_equal $res 2
+        assert_encoding listpack z3{t}
         set res [r zrangestore z2{t} z1{t} \[b \[c BYLEX]
         assert_equal $res 2
         r zrange z2{t} 0 -1 withscores
     } {b 2 c 3}
 
     test {ZRANGESTORE BYSCORE} {
+        set res [r zrangestore z4{t} z1{t} 1 2 BYSCORE]
+        assert_equal $res 2
+        assert_encoding listpack z4{t}
         set res [r zrangestore z2{t} z1{t} 1 2 BYSCORE]
         assert_equal $res 2
         r zrange z2{t} 0 -1 withscores
diff --git a/tests/unit/wait.tcl b/tests/unit/wait.tcl
index 8c6010afb0a..0f20ef87e9d 100644
--- a/tests/unit/wait.tcl
+++ b/tests/unit/wait.tcl
@@ -121,10 +121,10 @@ tags {"wait aof network external:skip"} {
             r config set appendfsync always
             $master incr foo
             assert_equal [$master waitaof 1 0 0] {1 0}
-            r config set appendfsync everysec
         }
 
         test {WAITAOF local wait and then stop aof} {
+            r config set appendfsync no
             set rd [redis_deferring_client]
             $rd incr foo
             $rd read
@@ -140,6 +140,37 @@ tags {"wait aof network external:skip"} {
             assert_error {ERR WAITAOF cannot be used when numlocal is set but appendonly is disabled.} {$master waitaof 1 0 0}
         }
 
+        test {WAITAOF local if AOFRW was postponed} {
+            r config set appendfsync everysec
+
+            # turn off AOF
+            r config set appendonly no
+
+            # create an RDB child that takes a lot of time to run
+            r set x y
+            r config set rdb-key-save-delay 100000000  ;# 100 seconds
+            r bgsave
+            assert_equal [s rdb_bgsave_in_progress] 1
+
+            # turn on AOF
+            r config set appendonly yes
+            assert_equal [s aof_rewrite_scheduled] 1
+
+            # create a write command (to increment master_repl_offset)
+            r set x y
+
+            # reset save_delay and kill RDB child
+            r config set rdb-key-save-delay 0
+            catch {exec kill -9 [get_child_pid 0]}
+
+            # wait for AOF (will unblock after AOFRW finishes)
+            assert_equal [r waitaof 1 0 10000] {1 0}
+
+            # make sure AOFRW finished
+            assert_equal [s aof_rewrite_in_progress] 0
+            assert_equal [s aof_rewrite_scheduled] 0
+        }
+
         $master config set appendonly yes
         waitForBgrewriteaof $master
 
diff --git a/utils/build-static-symbols.tcl b/utils/build-static-symbols.tcl
index e634cbe0ff2..7d238a92dec 100644
--- a/utils/build-static-symbols.tcl
+++ b/utils/build-static-symbols.tcl
@@ -2,7 +2,10 @@
 # Useful to get stack traces on segfault without a debugger. See redis.c
 # for more information.
 #
-# Copyright(C) 2009 Salvatore Sanfilippo, under the BSD license.
+# Copyright(C) 2009-Present Redis Ltd. All rights reserved.
+#
+# Licensed under your choice of the Redis Source Available License 2.0
+# (RSALv2) or the Server Side Public License v1 (SSPLv1).
 
 set fd [open redis.c]
 set symlist {}
diff --git a/utils/corrupt_rdb.c b/utils/corrupt_rdb.c
index df9c93ed88b..6d4d127f877 100644
--- a/utils/corrupt_rdb.c
+++ b/utils/corrupt_rdb.c
@@ -1,8 +1,11 @@
 /* Trivia program to corrupt an RDB file in order to check the RDB check
  * program behavior and effectiveness.
  *
- * Copyright (C) 2016 Salvatore Sanfilippo.
- * This software is released in the 3-clause BSD license. */
+ * Copyright (C) 2016-Present Redis Ltd. All rights reserved.
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
+ */
 
 #include <stdio.h>
 #include <fcntl.h>
diff --git a/utils/create-cluster/create-cluster b/utils/create-cluster/create-cluster
index d97ee2b9cce..dd200833138 100755
--- a/utils/create-cluster/create-cluster
+++ b/utils/create-cluster/create-cluster
@@ -58,6 +58,23 @@ then
     exit 0
 fi
 
+if [ "$1" == "restart" ]
+then
+    OLD_PORT=$PORT
+    while [ $((PORT < ENDPORT)) != "0" ]; do
+        PORT=$((PORT+1))
+        echo "Stopping $PORT"
+        $BIN_PATH/redis-cli -p $PORT shutdown nosave
+    done
+    PORT=$OLD_PORT
+    while [ $((PORT < ENDPORT)) != "0" ]; do
+        PORT=$((PORT+1))
+        echo "Starting $PORT"
+        $BIN_PATH/redis-server --port $PORT --protected-mode $PROTECTED_MODE --cluster-enabled yes --cluster-config-file nodes-${PORT}.conf --cluster-node-timeout $TIMEOUT --appendonly yes --appendfilename appendonly-${PORT}.aof --appenddirname appendonlydir-${PORT} --dbfilename dump-${PORT}.rdb --logfile ${PORT}.log --daemonize yes ${ADDITIONAL_OPTIONS}
+    done
+    exit 0
+fi
+
 if [ "$1" == "watch" ]
 then
     PORT=$((PORT+1))
@@ -113,10 +130,11 @@ then
     exit 0
 fi
 
-echo "Usage: $0 [start|create|stop|watch|tail|tailall|clean|clean-logs|call]"
+echo "Usage: $0 [start|create|stop|restart|watch|tail|tailall|clean|clean-logs|call]"
 echo "start       -- Launch Redis Cluster instances."
 echo "create [-f] -- Create a cluster using redis-cli --cluster create."
 echo "stop        -- Stop Redis Cluster instances."
+echo "restart     -- Restart Redis Cluster instances."
 echo "watch       -- Show CLUSTER NODES output (first 30 lines) of first node."
 echo "tail <id>   -- Run tail -f of instance at base port + ID."
 echo "tailall     -- Run tail -f for all the log files at once."
diff --git a/utils/generate-command-code.py b/utils/generate-command-code.py
index dc66ce81f70..2d7cc5b0d2e 100755
--- a/utils/generate-command-code.py
+++ b/utils/generate-command-code.py
@@ -242,7 +242,8 @@ def write_internal_structs(self, f):
 
 def to_c_name(str):
     return str.replace(":", "").replace(".", "_").replace("$", "_")\
-        .replace("^", "_").replace("*", "_").replace("-", "_")
+        .replace("^", "_").replace("*", "_").replace("-", "_") \
+        .replace("\\", "_").replace("+", "_")
 
 
 class ReplySchema(object):
@@ -285,7 +286,7 @@ def struct_code(name, k, v):
                 t = "JSON_TYPE_INTEGER"
                 vstr = ".value.integer=%d" % v
             
-            return "%s,\"%s\",%s" % (t, k, vstr)
+            return "%s,%s,%s" % (t, json.dumps(k), vstr)
 
         for k, v in self.schema.items():
             if isinstance(v, ReplySchema):
diff --git a/utils/generate-fmtargs.py b/utils/generate-fmtargs.py
new file mode 100755
index 00000000000..e16cc368faa
--- /dev/null
+++ b/utils/generate-fmtargs.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+
+# Outputs the generated part of src/fmtargs.h
+MAX_ARGS = 120
+
+import os
+print("/* Everything below this line is automatically generated by")
+print(" * %s. Do not manually edit. */\n" % os.path.basename(__file__))
+
+print('#define ARG_N(' + ', '.join(['_' + str(i) for i in range(1, MAX_ARGS + 1, 1)]) + ', N, ...) N')
+
+print('\n#define RSEQ_N() ' + ', '.join([str(i) for i in range(MAX_ARGS, -1, -1)]))
+
+print('\n#define COMPACT_FMT_2(fmt, value) fmt')
+for i in range(4, MAX_ARGS + 1, 2):
+    print('#define COMPACT_FMT_{}(fmt, value, ...) fmt COMPACT_FMT_{}(__VA_ARGS__)'.format(i, i - 2))
+
+print('\n#define COMPACT_VALUES_2(fmt, value) value')
+for i in range(4, MAX_ARGS + 1, 2):
+    print('#define COMPACT_VALUES_{}(fmt, value, ...) value, COMPACT_VALUES_{}(__VA_ARGS__)'.format(i, i - 2))
+
+print("\n#endif")
diff --git a/utils/hyperloglog/hll-err.rb b/utils/hyperloglog/hll-err.rb
index 2c71ac5efc0..fa57547b5a2 100644
--- a/utils/hyperloglog/hll-err.rb
+++ b/utils/hyperloglog/hll-err.rb
@@ -1,5 +1,7 @@
-# hll-err.rb - Copyright (C) 2014 Salvatore Sanfilippo
-# BSD license, See the COPYING file for more information.
+# hll-err.rb - Copyright (C) 2014-Present Redis Ltd.
+#
+# Licensed under your choice of the Redis Source Available License 2.0
+# (RSALv2) or the Server Side Public License v1 (SSPLv1).
 #
 # Check error of HyperLogLog Redis implementation for different set sizes.
 
diff --git a/utils/hyperloglog/hll-gnuplot-graph.rb b/utils/hyperloglog/hll-gnuplot-graph.rb
index 6c7596d17c8..c85158d9805 100644
--- a/utils/hyperloglog/hll-gnuplot-graph.rb
+++ b/utils/hyperloglog/hll-gnuplot-graph.rb
@@ -1,5 +1,7 @@
-# hll-err.rb - Copyright (C) 2014 Salvatore Sanfilippo
-# BSD license, See the COPYING file for more information.
+# hll-err.rb - Copyright (C) 2014-Present Redis Ltd.
+#
+# Licensed under your choice of the Redis Source Available License 2.0
+# (RSALv2) or the Server Side Public License v1 (SSPLv1).
 #
 # This program is suited to output average and maximum errors of
 # the Redis HyperLogLog implementation in a format suitable to print
diff --git a/utils/redis-copy.rb b/utils/redis-copy.rb
index 7c5c52dd6c7..bcb66a1fbee 100644
--- a/utils/redis-copy.rb
+++ b/utils/redis-copy.rb
@@ -1,5 +1,7 @@
-# redis-copy.rb - Copyright (C) 2009-2010 Salvatore Sanfilippo
-# BSD license, See the COPYING file for more information.
+# redis-copy.rb - Copyright (C) 2009-Present Redis Ltd. All rights reserved.
+#
+# Licensed under your choice of the Redis Source Available License 2.0
+# (RSALv2) or the Server Side Public License v1 (SSPLv1).
 #
 # Copy the whole dataset from one Redis instance to another one
 #
diff --git a/utils/redis-sha1.rb b/utils/redis-sha1.rb
index 6a8b4f3586d..25d3e3137fb 100644
--- a/utils/redis-sha1.rb
+++ b/utils/redis-sha1.rb
@@ -1,5 +1,7 @@
-# redis-sha1.rb - Copyright (C) 2009 Salvatore Sanfilippo
-# BSD license, See the COPYING file for more information.
+# redis-sha1.rb - Copyright (C) 2009-Present Redis Ltd. All rights reserved.
+#
+# Licensed under your choice of the Redis Source Available License 2.0
+# (RSALv2) or the Server Side Public License v1 (SSPLv1).
 #
 # Performs the SHA1 sum of the whole dataset.
 # This is useful to spot bugs in persistence related code and to make sure
diff --git a/utils/speed-regression.tcl b/utils/speed-regression.tcl
index bf35c7db4b1..d909934bb8c 100755
--- a/utils/speed-regression.tcl
+++ b/utils/speed-regression.tcl
@@ -1,6 +1,8 @@
 #!/usr/bin/env tclsh8.5
-# Copyright (C) 2011 Salvatore Sanfilippo
-# Released under the BSD license like Redis itself
+# Copyright (C) 2011-Present Redis Ltd. All rights reserved.
+#
+# Licensed under your choice of the Redis Source Available License 2.0
+# (RSALv2) or the Server Side Public License v1 (SSPLv1).
 
 source ../tests/support/redis.tcl
 set ::port 12123
diff --git a/utils/tracking_collisions.c b/utils/tracking_collisions.c
index f52111173d2..020d8eefc44 100644
--- a/utils/tracking_collisions.c
+++ b/utils/tracking_collisions.c
@@ -17,8 +17,10 @@
  *
  * --------------------------------------------------------------------------
  *
- * Copyright (C) 2019 Salvatore Sanfilippo
- * This code is released under the BSD 2 clause license.
+ * Copyright (C) 2019-Present Redis Ltd. All rights reserved.
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
  */
 
 #include <stdlib.h>