diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 335a060f8..95a05c99d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -33,4 +33,14 @@ jobs: runs-on: ubuntu-18.04 steps: - uses: actions/checkout@v2 - - run: cd e2e && make test + - id: run_e2e_test + run: cd e2e && make test + - if: failure() && steps.run_e2e_test.outcome == 'failure' + run: | + ./e2e/bin/kubectl -n e2e-test-external describe mysqlclusters.moco.cybozu.com + ./e2e/bin/kubectl -n e2e-test-external get statefulsets + ./e2e/bin/kubectl -n e2e-test-external get pods + ./e2e/bin/kubectl -n moco-system logs -lcontrol-plane=moco-controller-manager --tail=-1 + ./e2e/bin/kubectl -n e2e-test-external logs -lapp.kubernetes.io/name=moco-mysql -c agent --tail=-1 + top -b -n 1 + df -h diff --git a/controllers/mysql_clustering.go b/controllers/mysql_clustering.go index f744b297e..786b888fc 100644 --- a/controllers/mysql_clustering.go +++ b/controllers/mysql_clustering.go @@ -166,7 +166,14 @@ func decideNextOperation(log logr.Logger, cluster *mocov1alpha1.MySQLCluster, st op = restoreEmptyInstance(status, cluster) if len(op) != 0 { + var wait bool + for _, o := range op { + if o.Name() == ops.OperatorClone { + wait = true + } + } return &Operation{ + Wait: wait, Operators: op, Phase: moco.PhaseRestoreInstance, Event: &moco.EventRestoringReplicaInstances, diff --git a/controllers/mysql_clustering_test.go b/controllers/mysql_clustering_test.go index 98ca13690..f19e4dce9 100644 --- a/controllers/mysql_clustering_test.go +++ b/controllers/mysql_clustering_test.go @@ -159,7 +159,7 @@ func TestDecideNextOperation(t *testing.T) { ), }, want: &Operation{ - Wait: false, + Wait: true, Operators: []ops.Operator{ops.SetCloneDonorListOp([]int{1}, hostName(0)+":"+strconv.Itoa(moco.MySQLAdminPort)), ops.CloneOp(1, false)}, Phase: moco.PhaseRestoreInstance, Event: &moco.EventRestoringReplicaInstances, @@ -176,7 +176,7 @@ func TestDecideNextOperation(t *testing.T) { ), }, want: &Operation{ - Wait: false, + Wait: true, Operators: []ops.Operator{ops.SetCloneDonorListOp([]int{1}, hostName(0)+":"+strconv.Itoa(moco.MySQLAdminPort)), ops.CloneOp(1, false)}, Phase: moco.PhaseRestoreInstance, Event: &moco.EventRestoringReplicaInstances, diff --git a/controllers/mysqlcluster_controller.go b/controllers/mysqlcluster_controller.go index 1b4fe0e50..1234ef4f0 100644 --- a/controllers/mysqlcluster_controller.go +++ b/controllers/mysqlcluster_controller.go @@ -138,15 +138,17 @@ func (r *MySQLClusterReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error return ctrl.Result{}, err } + r.Recorder.Event(cluster, moco.EventInitializationSucceeded.Type, moco.EventInitializationSucceeded.Reason, moco.EventInitializationSucceeded.Message) + return ctrl.Result{Requeue: true}, nil } metrics.UpdateTotalReplicasMetrics(cluster.Name, cluster.Spec.Replicas) // clustering result, err := r.reconcileClustering(ctx, log, cluster) if err != nil { - log.Error(err, "failed to ready MySQLCluster") - return ctrl.Result{}, err + log.Info("failed to ready MySQLCluster", "err", err) + return ctrl.Result{RequeueAfter: 5 * time.Second}, nil } return result, nil diff --git a/e2e/bootstrap_test.go b/e2e/bootstrap_test.go index 9a52d889c..d3388c2b6 100644 --- a/e2e/bootstrap_test.go +++ b/e2e/bootstrap_test.go @@ -154,8 +154,7 @@ func testBootstrap() { `) Expect(err).ShouldNot(HaveOccurred()) - count := 100000 - err = insertData(primaryDB, count) + err = insertData(primaryDB, lineCount) Expect(err).ShouldNot(HaveOccurred()) Eventually(func() error { @@ -170,8 +169,8 @@ func testBootstrap() { return err } } - if replicatedCount != count { - return fmt.Errorf("repcalited: %d", replicatedCount) + if replicatedCount != lineCount { + return fmt.Errorf("replicated: %d", replicatedCount) } return nil }).Should(Succeed()) diff --git a/e2e/intermediate_primary_test.go b/e2e/intermediate_primary_test.go index 9a0e16a89..9b963e24e 100644 --- a/e2e/intermediate_primary_test.go +++ b/e2e/intermediate_primary_test.go @@ -80,7 +80,6 @@ stringData: Expect(err).ShouldNot(HaveOccurred()) defer connector.stopPortForward() - count := 100000 replica, err := minIndexReplica(cluster) Expect(err).ShouldNot(HaveOccurred()) var replicaDB *sqlx.DB @@ -104,8 +103,8 @@ stringData: return err } } - if replicatedCount != count { - return fmt.Errorf("repcalited: %d", replicatedCount) + if replicatedCount != lineCount { + return fmt.Errorf("replicated: %d", replicatedCount) } return nil }).Should(Succeed()) diff --git a/e2e/kubectl_moco_test.go b/e2e/kubectl_moco_test.go index f090a8a09..b52146563 100644 --- a/e2e/kubectl_moco_test.go +++ b/e2e/kubectl_moco_test.go @@ -2,6 +2,7 @@ package e2e import ( "fmt" + "strconv" "strings" "github.com/cybozu-go/moco" @@ -27,7 +28,7 @@ func testKubectlMoco() { stdout, stderr, err := execAtLocal("./bin/kubectl-moco", []byte("select count(*) from moco_e2e.replication_test"), "-n", cluster.Namespace, "mysql", "-u", "moco-readonly", "-i", cluster.Name) Expect(err).ShouldNot(HaveOccurred(), "stdout=%s, stderr=%s", stdout, stderr) - Expect(string(stdout)).Should(ContainSubstring("100000")) + Expect(string(stdout)).Should(ContainSubstring(strconv.Itoa(lineCount))) }) It("should fetch credential for root", func() { diff --git a/e2e/primary_failover_test.go b/e2e/primary_failover_test.go index 74ede5514..e73f56cab 100644 --- a/e2e/primary_failover_test.go +++ b/e2e/primary_failover_test.go @@ -50,18 +50,30 @@ func testPrimaryFailOver() { By("checking cluster status") Eventually(func() error { + /* + * The condition may not become unhealthy immediately after deleting pod. + * So, the following `findCondition` may observe healthy before primary switching. + * On the other hand, it is not guaranteed to observe the unhealthy condition. + * + * Consequently, `Eventually` block must contain both condition check and primary index check. + */ cluster, err = getMySQLCluster() healthy := findCondition(cluster.Status.Conditions, v1alpha1.ConditionHealthy) if healthy == nil || healthy.Status != corev1.ConditionTrue { return errors.New("should recover") } + + if cluster.Status.CurrentPrimaryIndex == nil { + return errors.New("current primary index is unknown") + } + newPrimary := *cluster.Status.CurrentPrimaryIndex + if newPrimary == firstPrimary { + return fmt.Errorf("current primary is still %d", firstPrimary) + } + return nil }, 2*time.Minute).Should(Succeed()) - Expect(cluster.Status.CurrentPrimaryIndex).ShouldNot(BeNil()) - newPrimary := *cluster.Status.CurrentPrimaryIndex - Expect(newPrimary).ShouldNot(Equal(firstPrimary)) - By("connecting to recovered instance") connector.stopPortForward() err = connector.startPortForward() @@ -106,7 +118,7 @@ func testPrimaryFailOver() { } } if count != primaryCount { - return fmt.Errorf("repcalited: %d", count) + return fmt.Errorf("replicated: %d", count) } return nil }).Should(Succeed()) diff --git a/e2e/replica_failover_test.go b/e2e/replica_failover_test.go index 64ef3ad8c..17c9ba69b 100644 --- a/e2e/replica_failover_test.go +++ b/e2e/replica_failover_test.go @@ -132,7 +132,7 @@ func testReplicaFailOver() { } } if replicatedCount != primaryCount { - return fmt.Errorf("repcalited: %d", replicatedCount) + return fmt.Errorf("replicated: %d", replicatedCount) } return nil }).Should(Succeed()) diff --git a/e2e/run_test.go b/e2e/run_test.go index 10a414738..4190ef1c0 100644 --- a/e2e/run_test.go +++ b/e2e/run_test.go @@ -34,7 +34,6 @@ func kubectl(args ...string) ([]byte, []byte, error) { return execAtLocal("./bin/kubectl", nil, args...) } -//lint:ignore U1000 This func may be used in the future. func kubectlWithInput(input []byte, args ...string) ([]byte, []byte, error) { return execAtLocal("./bin/kubectl", input, args...) } diff --git a/e2e/suite_test.go b/e2e/suite_test.go index 4a35be690..4cffb7258 100644 --- a/e2e/suite_test.go +++ b/e2e/suite_test.go @@ -9,6 +9,10 @@ import ( . "github.com/onsi/gomega" ) +const ( + lineCount = 10000 +) + func TestE2E(t *testing.T) { if os.Getenv("E2ETEST") == "" { t.Skip("Run under e2e/")