Skip to content

Commit

Permalink
Merge branch 'w/2.8/improvement/ZENKO-4919' into w/2.9/improvement/ZE…
Browse files Browse the repository at this point in the history
  • Loading branch information
francoisferrand committed Nov 18, 2024
2 parents 1f1f381 + 1add1a4 commit c18f581
Show file tree
Hide file tree
Showing 11 changed files with 93 additions and 47 deletions.
13 changes: 8 additions & 5 deletions .github/actions/archive-artifacts/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,26 +74,29 @@ runs:
set -exu
KAFKA=$(kubectl get pods -n ${NAMESPACE} -lkafka_cr=${ZENKO_NAME}-base-queue -o jsonpath='{.items[0].metadata.name}')
KAFKA_PATH="/tmp/artifacts/data/${STAGE}/kafka"
mkdir -p ${KAFKA_PATH}
kubectl exec -in ${NAMESPACE} ${KAFKA} -c kafka -- \
env KAFKA_OPTS= kafka-topics.sh --bootstrap-server :9092 --list \
> /tmp/artifacts/data/${STAGE}/kafka-topics.log
> ${KAFKA_PATH}/kafka-topics.log
kubectl exec -in ${NAMESPACE} ${KAFKA} -c kafka -- \
env KAFKA_OPTS= kafka-consumer-groups.sh --bootstrap-server :9092 --list \
> /tmp/artifacts/data/${STAGE}/kafka-consumer-groups.log
> ${KAFKA_PATH}/kafka-consumer-groups.log
kubectl exec -in ${NAMESPACE} ${KAFKA} -c kafka -- \
env KAFKA_OPTS= kafka-consumer-groups.sh --bootstrap-server :9092 --describe --all-groups \
> /tmp/artifacts/data/${STAGE}/kafka-offsets.log
> ${KAFKA_PATH}/kafka-offsets.log
KAFKA_SERVICE=$(kubectl get services -n ${NAMESPACE} -lkafka_cr=${ZENKO_NAME}-base-queue -o jsonpath='{.items[0].metadata.name}')
kubectl run -n ${NAMESPACE} kcat --image=edenhill/kcat:1.7.1 --restart=Never --command -- sleep 300
kubectl wait -n ${NAMESPACE} pod kcat --for=condition=ready
cat /tmp/artifacts/data/${STAGE}/kafka-topics.log | grep -v '^__' | xargs -P 15 -I {} \
cat ${KAFKA_PATH}/kafka-topics.log | grep -v '^__' | xargs -P 15 -I {} \
sh -c "kubectl exec -i -n ${NAMESPACE} kcat -- \
kcat -L -b ${KAFKA_SERVICE} -t {} -C -o beginning -e -q -J \
> /tmp/artifacts/data/${STAGE}/kafka-messages-{}.log"
> ${KAFKA_PATH}/kafka-messages-{}.log"
env:
STAGE: ${{ inputs.stage }}
NAMESPACE: ${{ inputs.zenko-namespace }}
Expand Down
5 changes: 3 additions & 2 deletions .github/actions/deploy/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,10 @@ runs:
docker pull ${OPERATOR_IMAGE_NAME}:${OPERATOR_IMAGE_TAG}
kind load docker-image ${OPERATOR_IMAGE_NAME}:${OPERATOR_IMAGE_TAG}
cd ./.github/scripts/end2end
git clone https://${GIT_ACCESS_TOKEN}@github.com/scality/zenko-operator.git operator
git init operator
cd operator
git checkout ${OPERATOR_IMAGE_TAG}
git fetch --depth 1 --no-tags https://${GIT_ACCESS_TOKEN}@github.com/scality/zenko-operator.git ${OPERATOR_IMAGE_TAG}
git checkout FETCH_HEAD
tilt ci
env:
OPERATOR_IMAGE_TAG: ${{ inputs.zkop_tag }}
Expand Down
6 changes: 6 additions & 0 deletions .github/scripts/end2end/configs/prometheus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,19 @@ spec:
evaluationInterval: 30s
logFormat: logfmt
logLevel: info
serviceMonitorNamespaceSelector: {}
serviceMonitorSelector:
matchLabels:
metalk8s.scality.com/monitor: ""
podMonitorNamespaceSelector: {}
podMonitorSelector:
matchLabels:
metalk8s.scality.com/monitor: ""
probeNamespaceSelector: {}
probeSelector:
matchLabels:
metalk8s.scality.com/monitor: ""
ruleNamespaceSelector: {}
ruleSelector:
matchLabels:
metalk8s.scality.com/monitor: ""
23 changes: 15 additions & 8 deletions .github/scripts/end2end/install-kind-dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,21 @@ kubectl rollout status -n ingress-nginx deployment/ingress-nginx-controller --ti

# cert-manager
kubectl apply --validate=false -f https://github.com/jetstack/cert-manager/releases/download/${CERT_MANAGER_VERSION}/cert-manager.yaml --wait
# kubectl apply --validate=false -f - <<EOF
# apiVersion: cert-manager.io/v1
# kind: ClusterIssuer
# metadata:
# name: artesca-root-ca-issuer
# spec:
# selfSigned: {}
# EOF
kubectl rollout status -n cert-manager deployment/cert-manager-webhook --timeout=10m

retries=20
until kubectl apply -f - <<EOF
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: artesca-root-ca-issuer
spec:
selfSigned: {}
EOF
do
((--retries)) || { echo "Failed to create ClusterIssuer"; exit 1; }
sleep 1
done

# prometheus
# last-applied-configuration can end up larger than 256kB which is too large for an annotation
Expand Down
1 change: 1 addition & 0 deletions .github/scripts/end2end/patch-coredns.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ corefile="
rewrite name exact s3.dr.zenko.local ingress-nginx-controller.ingress-nginx.svc.cluster.local
rewrite name exact sts.dr.zenko.local ingress-nginx-controller.ingress-nginx.svc.cluster.local
rewrite name exact iam.dr.zenko.local ingress-nginx-controller.ingress-nginx.svc.cluster.local
rewrite name exact prom.dr.zenko.local ingress-nginx-controller.ingress-nginx.svc.cluster.local
rewrite name exact shell-ui.dr.zenko.local ingress-nginx-controller.ingress-nginx.svc.cluster.local
rewrite name exact website.mywebsite.com ingress-nginx-controller.ingress-nginx.svc.cluster.local
kubernetes cluster.local in-addr.arpa ip6.arpa {
Expand Down
9 changes: 5 additions & 4 deletions .github/workflows/alerts.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
name: Test alerts

on:
push:
branches-ignore:
- 'development/**'
- 'q/*'
workflow_call:
secrets:
GIT_ACCESS_TOKEN:
description: 'GitHub token'
required: true

jobs:
run-alert-tests:
Expand Down
15 changes: 10 additions & 5 deletions .github/workflows/end2end.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@ jobs:
- name: Verify monitoring dashboard versions
run: bash ./.github/scripts/check_versions.sh

check-alerts:
uses: ./.github/workflows/alerts.yaml
secrets: inherit

check-workflows:
runs-on: ubuntu-22.04
steps:
Expand Down Expand Up @@ -388,7 +392,7 @@ jobs:
cache-to: type=gha,mode=max,scope=end2end-ctst

end2end-http:
needs: [build-kafka, build-test-image, check-dashboard-versions]
needs: [build-kafka, build-test-image]
runs-on:
- ubuntu
- focal
Expand Down Expand Up @@ -437,7 +441,7 @@ jobs:
run: kind delete cluster

end2end-pra:
needs: [build-kafka, check-dashboard-versions, lint-and-build-ctst]
needs: [build-kafka, lint-and-build-ctst]
runs-on: ubuntu-22.04-16core
env:
GIT_ACCESS_TOKEN: ${{ secrets.GIT_ACCESS_TOKEN }}
Expand Down Expand Up @@ -497,7 +501,7 @@ jobs:
run: kind delete cluster

end2end-https:
needs: [build-kafka, build-test-image, check-dashboard-versions]
needs: [build-kafka, build-test-image]
runs-on:
- ubuntu
- focal
Expand Down Expand Up @@ -549,7 +553,7 @@ jobs:
run: kind delete cluster

end2end-sharded:
needs: [build-kafka, build-test-image, check-dashboard-versions]
needs: [build-kafka, build-test-image]
runs-on:
- ubuntu-22.04-8core
# Enable this for Ring-based tests
Expand Down Expand Up @@ -589,7 +593,7 @@ jobs:
run: kind delete cluster

ctst-end2end-sharded:
needs: [build-kafka, lint-and-build-ctst, check-dashboard-versions]
needs: [build-kafka, lint-and-build-ctst]
runs-on:
- ubuntu-22.04-8core
steps:
Expand Down Expand Up @@ -638,6 +642,7 @@ jobs:
write-final-status:
runs-on: ubuntu-latest
needs:
- check-alerts
- check-dashboard-versions
- check-workflows
- build-doc
Expand Down
6 changes: 3 additions & 3 deletions solution/deps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ cloudserver:
sourceRegistry: ghcr.io/scality
dashboard: cloudserver/cloudserver-dashboards
image: cloudserver
tag: 8.8.35
tag: 8.8.36
envsubst: CLOUDSERVER_TAG
drctl:
sourceRegistry: ghcr.io/scality
Expand Down Expand Up @@ -113,7 +113,7 @@ sorbet:
policy: sorbet/sorbet-policies
dashboard: sorbet/sorbet-dashboards
image: sorbet
tag: v1.1.12
tag: v1.1.13
envsubst: SORBET_TAG
stern: # tail any pod logs with pattern matchin
tag: 1.30.0
Expand All @@ -136,7 +136,7 @@ vault:
zenko-operator:
sourceRegistry: ghcr.io/scality
image: zenko-operator
tag: v1.6.4
tag: v1.6.5
envsubst: ZENKO_OPERATOR_TAG
zenko-ui:
sourceRegistry: ghcr.io/scality
Expand Down
3 changes: 2 additions & 1 deletion tests/ctst/features/pra.feature
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ Feature: PRA operations
Given a DR installed
Then the DR source should be in phase "Running"
And the DR sink should be in phase "Running"
Then the kafka DR volume exists
And the kafka DR volume exists
And prometheus should scrap federated metrics from DR sink

# Check that objects are transitioned in the DR site
Given access keys for the replicated account
Expand Down
25 changes: 21 additions & 4 deletions tests/ctst/steps/pra.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import {
} from 'steps/utils/utils';
import { CacheHelper, Constants, Identity, IdentityEnum, SuperAdmin, Utils } from 'cli-testing';
import { safeJsonParse } from 'common/utils';
import { PrometheusDriver } from 'prometheus-query';
import assert from 'assert';
import { EntityType } from 'world/Zenko';

Expand Down Expand Up @@ -70,9 +71,8 @@ async function installPRA(world: Zenko, sinkS3Endpoint = 'http://s3.zenko.local'
sourceS3Endpoint: 'http://s3.zenko.local',
sinkS3Endpoint,
prometheusService: world.parameters.PrometheusService,
// prometheusHostname: 'prom.dr.zenko.local', // could be any name, cert will be auto-generated
prometheusHostname: 'prom.dr.zenko.local',
prometheusExternalIpsDiscovery: true,
prometheusDisableTls: true,
forceRotateServiceCredentials: (CacheHelper.savedAcrossTests[Zenko.PRA_INSTALL_COUNT_KEY] as number) > 0,
...kafkaExternalIpOption,
timeout,
Expand Down Expand Up @@ -328,8 +328,25 @@ Then('the kafka DR volume exists', { timeout: volumeTimeout + 2000 }, async func
assert(volumeParsed.result!['volume phase'] === 'Bound');
});

Then('prometheus should scrap federated metrics from DR sink', { timeout: 70000 }, async function (this: Zenko) {
const prom = new PrometheusDriver({
endpoint: `http://${this.parameters.PrometheusService}:9090`,
baseURL: '/api/v1',
});

for (;;) {
const t = Date.now();
const metrics = await prom.series('{drSinkInstance="end2end-pra-sink"}', t - 60 * 1000, t);
if (metrics.length > 0) {
break;
}

await Utils.sleep(1000);
}
});

const failoverTimeout = 360000;
When ('I request the failover state for the DR', { timeout: failoverTimeout + 2000 }, async function (this: Zenko) {
When('I request the failover state for the DR', { timeout: failoverTimeout + 2000 }, async function (this: Zenko) {
await this.zenkoDrCtl?.failover({
sinkZenkoDrNamespace: 'default',
sinkZenkoDrInstance: 'end2end-pra-sink',
Expand All @@ -339,7 +356,7 @@ When ('I request the failover state for the DR', { timeout: failoverTimeout + 20
});

const failbackTimeout = 360000;
When ('I resume operations for the DR', { timeout: failbackTimeout + 2000 }, async function (this: Zenko) {
When('I resume operations for the DR', { timeout: failbackTimeout + 2000 }, async function (this: Zenko) {
await this.zenkoDrCtl?.failback({
sinkZenkoDrNamespace: 'default',
sinkZenkoDrInstance: 'end2end-pra-sink',
Expand Down
34 changes: 19 additions & 15 deletions tests/zenko_tests/node_tests/backbeat/ReplicationUtility.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
const assert = require('assert');
const crypto = require('crypto');
const async = require('async');
const { jsutil } = require('arsenal');

const { scalityS3Client, awsS3Client } = require('../s3SDK');

Expand Down Expand Up @@ -589,28 +590,28 @@ class ReplicationUtility {

// Continue getting head object while the status is PENDING or PROCESSING.
waitUntilReplicated(bucketName, key, versionId, cb) {
let status;
return async.doWhilst(
callback => this.s3.headObject({
Bucket: bucketName,
Key: key,
VersionId: versionId,
}, (err, data) => {
const cbOnce = jsutil.once(callback);
if (err) {
return callback(err);
return cbOnce(err);
}
status = data.ReplicationStatus;
const status = data.ReplicationStatus;
assert.notStrictEqual(
status,
'FAILED',
`Unexpected CRR failure occurred: ${JSON.stringify(data)}`,
);
if (status === 'PENDING' || status === 'PROCESSING') {
return setTimeout(callback, 2000);
return setTimeout(() => cbOnce(null, status), 2000);
}
return callback();
return cbOnce(null, status);
}),
() => (status === 'PENDING' || status === 'PROCESSING'),
status => (status === 'PENDING' || status === 'PROCESSING'),
cb,
);
}
Expand All @@ -622,14 +623,15 @@ class ReplicationUtility {
const expectedCode = client === 'azure' ? 'BlobNotFound' : 'NoSuchKey';
return async.doWhilst(
callback => this[method](bucketName, key, err => {
const cbOnce = jsutil.once(callback);
if (err && err.code !== expectedCode) {
return callback(err);
return cbOnce(err);
}
objectExists = err === null;
if (!objectExists) {
return callback();
return cbOnce();
}
return setTimeout(callback, 2000);
return setTimeout(cbOnce, 2000);
}),
() => objectExists,
cb,
Expand All @@ -644,8 +646,9 @@ class ReplicationUtility {
Bucket: bucketName,
Key: key,
}, (err, data) => {
const cbOnce = jsutil.once(callback);
if (err) {
return callback(err);
return cbOnce(err);
}
const statuses = [];
// We cannot rely on the global status for one-to-many, so check
Expand All @@ -657,9 +660,9 @@ class ReplicationUtility {
});
shouldContinue = statuses.includes('PENDING');
if (shouldContinue) {
return setTimeout(callback, 2000);
return setTimeout(cbOnce, 2000);
}
return callback();
return cbOnce();
}),
() => shouldContinue,
cb,
Expand All @@ -674,14 +677,15 @@ class ReplicationUtility {
Bucket: bucketName,
Key: key,
}, (err, data) => {
const cbOnce = jsutil.once(callback);
if (err) {
return callback(err);
return cbOnce(err);
}
shouldContinue = data.ReplicationStatus === 'FAILED';
if (shouldContinue) {
return setTimeout(callback, 2000);
return setTimeout(cbOnce, 2000);
}
return callback();
return cbOnce();
}),
() => shouldContinue,
cb,
Expand Down

0 comments on commit c18f581

Please sign in to comment.