Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix cluster discovery process. #261

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ public void createChannel(UUID previousId, InetSocketAddress candidate) {

if (this.loadServerFeatures()) {
this.channelId = UUID.randomUUID();
this.connection.confirmChannel();
logger.info("Connection to endpoint [{}] created successfully", this.connection.getLastConnectedEndpoint());
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ class ConnectionState {
private InetSocketAddress previous;
private ManagedChannel currentChannel;

// Indicates if the current channel passed all the connection pre-requisites to be used by the user
// Not exhaustive list includes:
// * If we managed to get a gossip seed from the channel
// * If we managed to read the server features (if not, it was a not found error then it's not fatal, just old node version)
private boolean confirmedChannel;

ConnectionState(EventStoreDBClientSettings settings) {
this.settings = settings;

Expand All @@ -46,7 +52,11 @@ class ConnectionState {
}

InetSocketAddress getLastConnectedEndpoint() {
return this.previous;
return this.confirmedChannel ? this.previous : null;
}

void confirmChannel() {
this.confirmedChannel = true;
}

ManagedChannel getCurrentChannel() {
Expand Down Expand Up @@ -82,6 +92,7 @@ void connect(InetSocketAddress addr) {
builder.keepAliveTime(settings.getKeepAliveInterval(), TimeUnit.MILLISECONDS);

this.currentChannel = builder.build();
this.confirmedChannel = false;
this.previous = addr;
}

Expand All @@ -108,5 +119,6 @@ public void shutdown() {

public void clear() {
this.previous = null;
this.confirmedChannel = false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public class EventStoreDBClientBase {
EventStoreDBClientBase(EventStoreDBClientSettings settings) {
Discovery discovery;

if (settings.getHosts().length == 1) {
if (settings.getHosts().length == 1 && !settings.isDnsDiscover()) {
discovery = new SingleNodeDiscovery(settings.getHosts()[0]);
} else {
discovery = new ClusterDiscovery(settings);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,20 @@ public synchronized EventStoreDBClient getDefaultClient(Database database) {
continue;
}

// In some rare occasions, it's possible for GHA to take much more time setting up a cluster
// through docker compose. In this case, we recreate a fresh client in the case we exhausted
// all discovery attempts and the connection got closed.
if (e.getCause() instanceof ConnectionShutdownException && (settings.isDnsDiscover() || settings.getHosts().length > 1)) {
logger.debug("Seems we exhausted all discovery attempts. Unusual but maybe docker is slow");
try {
Thread.sleep(500);
} catch (InterruptedException ex) {
throw new RuntimeException(ex);
}
defaultClient = EventStoreDBClient.create(settings);
continue;
}

throw new RuntimeException(e);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@ public ExternallyCreatedCluster(boolean secure) {
public ConnectionSettingsBuilder defaultSettingsBuilder() {
return EventStoreDBClientSettings
.builder()
.dnsDiscover(true)
.defaultCredentials("admin", "changeit")
.addHost("localhost", 2_111)
.addHost("localhost", 2_112)
.addHost("localhost", 2_113)
.tls(secure)
.tlsVerifyCert(false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import java.util.concurrent.ExecutionException;

public interface DeadlineTests extends ConnectionAware {
@Test
@RetryingTest(10)
default void testDefaultDeadline() throws Throwable {
EventStoreDBClient client = getDatabase().connectWith(opts ->
opts.defaultDeadline(1)
Expand Down
128 changes: 60 additions & 68 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,106 +1,98 @@
version: '3.5'
version: "3.5"

services:
volumes-provisioner:
image: "hasnat/volumes-provisioner"
image: hasnat/volumes-provisioner
environment:
PROVISION_DIRECTORIES: "1000:1000:0755:/tmp/certs"
volumes:
- "./certs:/tmp/certs"
network_mode: "none"
network_mode: none

setup:
cert-gen:
image: eventstore/es-gencert-cli:1.0.2
entrypoint: bash
user: "1000:1000"
command: >
-c "mkdir -p ./certs && cd /certs
&& es-gencert-cli create-ca
&& es-gencert-cli create-node -out ./node1 --dns-names node1.eventstore
&& es-gencert-cli create-node -out ./node2 --dns-names node2.eventstore
&& es-gencert-cli create-node -out ./node3 --dns-names node3.eventstore
&& es-gencert-cli create-node -out ./node1 -ip-addresses 127.0.0.1,172.30.240.11 -dns-names localhost
&& es-gencert-cli create-node -out ./node2 -ip-addresses 127.0.0.1,172.30.240.12 -dns-names localhost
&& es-gencert-cli create-node -out ./node3 -ip-addresses 127.0.0.1,172.30.240.13 -dns-names localhost
&& find . -type f -print0 | xargs -0 chmod 666"
container_name: setup
volumes:
- ./certs:/certs
- "./certs:/certs"
depends_on:
- volumes-provisioner

node1.eventstore: &template
esdb-node1:
image: ghcr.io/eventstore/eventstore:${CONTAINER_IMAGE_VERSION:-latest}
container_name: node1.eventstore
env_file:
- vars.env
environment:
- EVENTSTORE_EXT_HOST_ADVERTISE_AS=node1.eventstore
- EVENTSTORE_INT_HOST_ADVERTISE_AS=node1.eventstore
- EVENTSTORE_GOSSIP_SEED=node2.eventstore:2113,node3.eventstore:2113
- EVENTSTORE_CERTIFICATE_FILE=/certs/node1/node.crt
- EVENTSTORE_CERTIFICATE_PRIVATE_KEY_FILE=/certs/node1/node.key
- EVENTSTORE_GOSSIP_SEED=172.30.240.12:2113,172.30.240.13:2113
- EVENTSTORE_INT_IP=172.30.240.11
- EVENTSTORE_CERTIFICATE_FILE=/etc/eventstore/certs/node1/node.crt
- EVENTSTORE_CERTIFICATE_PRIVATE_KEY_FILE=/etc/eventstore/certs/node1/node.key
- EVENTSTORE_ADVERTISE_HTTP_PORT_TO_CLIENT_AS=2111
- EVENTSTORE_ADVERTISE_TCP_PORT_TO_CLIENT_AS=1111
healthcheck:
test:
[
'CMD-SHELL',
'curl --fail --insecure https://node1.eventstore:2113/health/live || exit 1',
]
interval: 5s
timeout: 5s
retries: 24
ports:
- 1111:1113
- 2111:2113
networks:
clusternetwork:
ipv4_address: 172.30.240.11
volumes:
- ./certs:/certs
- ./certs:/etc/eventstore/certs
restart: unless-stopped
depends_on:
- setup
restart: always
- cert-gen

node2.eventstore:
<<: *template
container_name: node2.eventstore
esdb-node2:
image: ghcr.io/eventstore/eventstore:${CONTAINER_IMAGE_VERSION:-latest}
env_file:
- vars.env
environment:
- EVENTSTORE_EXT_HOST_ADVERTISE_AS=node2.eventstore
- EVENTSTORE_INT_HOST_ADVERTISE_AS=node2.eventstore
- EVENTSTORE_GOSSIP_SEED=node1.eventstore:2113,node3.eventstore:2113
- EVENTSTORE_CERTIFICATE_FILE=/certs/node2/node.crt
- EVENTSTORE_CERTIFICATE_PRIVATE_KEY_FILE=/certs/node2/node.key
- EVENTSTORE_GOSSIP_SEED=172.30.240.11:2113,172.30.240.13:2113
- EVENTSTORE_INT_IP=172.30.240.12
- EVENTSTORE_CERTIFICATE_FILE=/etc/eventstore/certs/node2/node.crt
- EVENTSTORE_CERTIFICATE_PRIVATE_KEY_FILE=/etc/eventstore/certs/node2/node.key
- EVENTSTORE_ADVERTISE_HTTP_PORT_TO_CLIENT_AS=2112
- EVENTSTORE_ADVERTISE_TCP_PORT_TO_CLIENT_AS=1112
healthcheck:
test:
[
'CMD-SHELL',
'curl --fail --insecure https://node2.eventstore:2113/health/live || exit 1',
]
interval: 5s
timeout: 5s
retries: 24
ports:
- 1112:1113
- 2112:2113
networks:
clusternetwork:
ipv4_address: 172.30.240.12
volumes:
- ./certs:/etc/eventstore/certs
restart: unless-stopped
depends_on:
- cert-gen

node3.eventstore:
<<: *template
container_name: node3.eventstore
esdb-node3:
image: ghcr.io/eventstore/eventstore:${CONTAINER_IMAGE_VERSION:-latest}
env_file:
- vars.env
environment:
- EVENTSTORE_EXT_HOST_ADVERTISE_AS=node3.eventstore
- EVENTSTORE_INT_HOST_ADVERTISE_AS=node3.eventstore
- EVENTSTORE_GOSSIP_SEED=node1.eventstore:2113,node2.eventstore:2113
- EVENTSTORE_CERTIFICATE_FILE=/certs/node3/node.crt
- EVENTSTORE_CERTIFICATE_PRIVATE_KEY_FILE=/certs/node3/node.key
- EVENTSTORE_GOSSIP_SEED=172.30.240.11:2113,172.30.240.12:2113
- EVENTSTORE_INT_IP=172.30.240.13
- EVENTSTORE_CERTIFICATE_FILE=/etc/eventstore/certs/node3/node.crt
- EVENTSTORE_CERTIFICATE_PRIVATE_KEY_FILE=/etc/eventstore/certs/node3/node.key
- EVENTSTORE_ADVERTISE_HTTP_PORT_TO_CLIENT_AS=2113
- EVENTSTORE_ADVERTISE_TCP_PORT_TO_CLIENT_AS=1113
healthcheck:
test:
[
'CMD-SHELL',
'curl --fail --insecure https://node3.eventstore:2113/health/live || exit 1',
]
interval: 5s
timeout: 5s
retries: 24
ports:
- 1113:1113
- 2113:2113
networks:
clusternetwork:
ipv4_address: 172.30.240.13
volumes:
- ./certs:/etc/eventstore/certs
restart: unless-stopped
depends_on:
- cert-gen

networks:
clusternetwork:
name: eventstoredb.local
driver: bridge
ipam:
driver: default
config:
- subnet: 172.30.240.0/24
7 changes: 4 additions & 3 deletions vars.env
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
EVENTSTORE_CLUSTER_SIZE=3
EVENTSTORE_RUN_PROJECTIONS=All
EVENTSTORE_TRUSTED_ROOT_CERTIFICATES_PATH=/certs/ca
EVENTSTORE_INT_TCP_PORT=1112
EVENTSTORE_HTTP_PORT=2113
EVENTSTORE_TRUSTED_ROOT_CERTIFICATES_PATH=/etc/eventstore/certs/ca
EVENTSTORE_DISCOVER_VIA_DNS=false
EVENTSTORE_ENABLE_EXTERNAL_TCP=true
EVENTSTORE_ENABLE_ATOM_PUB_OVER_HTTP=true
EVENTSTORE_ADVERTISE_HOST_TO_CLIENT_AS=localhost
EVENTSTORE_ADVERTISE_HOST_TO_CLIENT_AS=localhost
Loading