Skip to content

Commit

Permalink
Docker: Disable the consumer timeout for RabbitMQ (#6189)
Browse files Browse the repository at this point in the history
As of RabbitMQ v3.8.15, a default `consumer_timeout` is set of 30 minutes.
If a task is not acknowledged within this timelimit, the consumer of the
task is considered dead and its tasks are rescheduled. This is problematic
for AiiDA since tasks often take multiple hours even.

The `consumer_timeout` can only be changed on through the server config.
Here we disable it through the `advanced.config`.

Cherry-pick: 33dffb0
  • Loading branch information
unkcpz authored and sphuber committed Nov 28, 2023
1 parent 90e586f commit 5ce1e7e
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 11 deletions.
11 changes: 9 additions & 2 deletions .docker/aiida-core-with-services/s6-assets/init/rabbitmq-init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,15 @@ echo MNESIA_BASE="${RABBITMQ_DATA_DIR}" >> "${RMQ_ETC_DIR}/rabbitmq-env.conf"
echo LOG_BASE="${RABBITMQ_DATA_DIR}/log" >> "${RMQ_ETC_DIR}/rabbitmq-env.conf"

# using workaround from https://github.com/aiidateam/aiida-core/wiki/RabbitMQ-version-to-use
# set timeout to 100 hours
echo "consumer_timeout=3600000" >> "${RMQ_ETC_DIR}/rabbitmq.conf"
# setting the consumer_timeout to undefined disables the timeout
cat > "${RMQ_ETC_DIR}/advanced.config" <<EOF
%% advanced.config
[
{rabbit, [
{consumer_timeout, undefined}
]}
].
EOF

# Explicitly define the node name. This is necessary because the mnesia subdirectory contains the hostname, which by
# default is set to the value of $(hostname -s), which for docker containers, will be a random hexadecimal string. Upon
Expand Down
20 changes: 16 additions & 4 deletions .docker/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# pylint: disable=missing-docstring, redefined-outer-name
import json
from pathlib import Path
import time

import pytest

Expand All @@ -22,10 +23,16 @@ def docker_compose(docker_services):
return docker_services._docker_compose


@pytest.fixture
def timeout():
"""Container and service startup timeout"""
return 30
def is_container_ready(docker_compose):
output = docker_compose.execute('exec -T aiida verdi status').decode().strip()
return 'Connected to RabbitMQ' in output and 'Daemon is running' in output


@pytest.fixture(scope='session', autouse=True)
def _docker_service_wait(docker_services):
"""Container startup wait."""

time.sleep(30)


@pytest.fixture
Expand Down Expand Up @@ -59,3 +66,8 @@ def python_version(_build_config):
@pytest.fixture(scope='session')
def pgsql_version(_build_config):
return _build_config['PGSQL_VERSION']['default']


@pytest.fixture(scope='session')
def rmq_version(_build_config):
return _build_config['RMQ_VERSION']['default']
15 changes: 10 additions & 5 deletions .docker/tests/test_aiida.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# -*- coding: utf-8 -*-
# pylint: disable=missing-docstring
import json
import time

from packaging.version import parse
import pytest
Expand All @@ -22,8 +21,15 @@ def test_correct_pgsql_version_installed(aiida_exec, pgsql_version, variant):
assert parse(info['version']).major == parse(pgsql_version).major


def test_verdi_status(aiida_exec, container_user, timeout):
time.sleep(timeout)
def test_rmq_consumer_timeout_unset(aiida_exec, variant):
if variant == 'aiida-core-base':
pytest.skip('RabbitMQ is not installed in the base image')

output = aiida_exec('rabbitmqctl environment | grep consumer_timeout', user='root').decode().strip()
assert 'undefined' in output


def test_verdi_status(aiida_exec, container_user):
output = aiida_exec('verdi status', user=container_user).decode().strip()
assert 'Connected to RabbitMQ' in output
assert 'Daemon is running' in output
Expand All @@ -32,8 +38,7 @@ def test_verdi_status(aiida_exec, container_user, timeout):
assert 'Warning' not in output


def test_computer_setup_success(aiida_exec, container_user, timeout):
time.sleep(timeout)
def test_computer_setup_success(aiida_exec, container_user):
output = aiida_exec('verdi computer test localhost', user=container_user).decode().strip()

assert 'Success' in output
Expand Down

0 comments on commit 5ce1e7e

Please sign in to comment.