diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 14c35645..8c97d7c6 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -4,6 +4,8 @@ on: push: tags: - '**' + branches: + - '**' env: REGISTRY: ghcr.io @@ -42,12 +44,13 @@ jobs: # set latest tag for default branch type=raw,value=latest,enable={{is_default_branch}} type=ref,event=tag + type=ref,event=branch - name: Build and push Docker image uses: docker/build-push-action@v3 with: context: . - platforms: linux/amd64 + platforms: linux/amd64,linux/arm64 file: Dockerfile push: true tags: ${{ steps.meta.outputs.tags }} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 801d0767..97078ee0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -7,6 +7,10 @@ Run tests via: make test ``` +# Architecture + +For a high-level overview of the architecture, see [docs/architecture.md](./docs/architecture.md). + # Release Process Prereq: Write access to the repo. diff --git a/Dockerfile b/Dockerfile index b8ca27d8..f03e29bc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,15 @@ -# Build the manager binary -FROM golang:1.20 as builder +FROM --platform=$BUILDPLATFORM golang:1.20-alpine AS builder + +RUN apk add --update --no-cache gcc libc-dev + +ARG TARGETARCH +ARG BUILDARCH + +RUN if [ "${TARGETARCH}" = "arm64" ] && [ "${BUILDARCH}" != "arm64" ]; then \ + wget -c https://musl.cc/aarch64-linux-musl-cross.tgz -O - | tar -xzvv --strip-components 1 -C /usr; \ + elif [ "${TARGETARCH}" = "amd64" ] && [ "${BUILDARCH}" != "amd64" ]; then \ + wget -c https://musl.cc/x86_64-linux-musl-cross.tgz -O - | tar -xzvv --strip-components 1 -C /usr; \ + fi WORKDIR /workspace # Copy the Go Modules manifests @@ -17,12 +27,16 @@ COPY internal/ internal/ ARG VERSION -# Build -RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags "-X github.com/strangelove-ventures/cosmos-operator/internal/version.version=$VERSION" -a -o manager . +RUN if [ "${TARGETARCH}" = "arm64" ] && [ "${BUILDARCH}" != "arm64" ]; then \ + export CC=aarch64-linux-musl-gcc CXX=aarch64-linux-musl-g++;\ + elif [ "${TARGETARCH}" = "amd64" ] && [ "${BUILDARCH}" != "amd64" ]; then \ + export CC=x86_64-linux-musl-gcc CXX=x86_64-linux-musl-g++; \ + fi; \ + export GOOS=linux GOARCH=$TARGETARCH CGO_ENABLED=1 LDFLAGS='-linkmode external -extldflags "-static"'; \ + go build -ldflags "-X github.com/strangelove-ventures/cosmos-operator/internal/version.version=$VERSION $LDFLAGS" -a -o manager . -# Use distroless as minimal base image to package the manager binary -# Refer to https://github.com/GoogleContainerTools/distroless for more details -FROM gcr.io/distroless/static:nonroot +# Build final image from scratch +FROM scratch LABEL org.opencontainers.image.source=https://github.com/strangelove-ventures/cosmos-operator diff --git a/api/v1/cosmosfullnode_types.go b/api/v1/cosmosfullnode_types.go index 0294a2d6..9c04a0ff 100644 --- a/api/v1/cosmosfullnode_types.go +++ b/api/v1/cosmosfullnode_types.go @@ -445,6 +445,27 @@ type ChainSpec struct { // +optional LogFormat *string `json:"logFormat"` + // URL to address book file to download from the internet. + // The operator detects and properly handles the following file extensions: + // .json, .json.gz, .tar, .tar.gz, .tar.gzip, .zip + // Use AddrbookScript if the chain has an unconventional file format or address book location. + // +optional + AddrbookURL *string `json:"addrbookURL"` + + // Specify shell (sh) script commands to properly download and save the address book file. + // Prefer AddrbookURL if the file is in a conventional format. + // The available shell commands are from docker image ghcr.io/strangelove-ventures/infra-toolkit, including wget and curl. + // Save the file to env var $ADDRBOOK_FILE. + // E.g. curl https://url-to-addrbook.com > $ADDRBOOK_FILE + // Takes precedence over AddrbookURL. + // Hint: Use "set -eux" in your script. + // Available env vars: + // $HOME: The home directory. + // $ADDRBOOK_FILE: The location of the final address book file. + // $CONFIG_DIR: The location of the config dir that houses the address book file. Used for extracting from archives. The archive must have a single file called "addrbook.json". + // +optional + AddrbookScript *string `json:"addrbookScript"` + // URL to genesis file to download from the internet. // Although this field is optional, you will almost always want to set it. // If not set, uses the genesis file created from the init subcommand. (This behavior may be desirable for new chains or testing.) diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index a3c13d7f..4a0a7676 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -64,6 +64,16 @@ func (in *ChainSpec) DeepCopyInto(out *ChainSpec) { *out = new(string) **out = **in } + if in.AddrbookURL != nil { + in, out := &in.AddrbookURL, &out.AddrbookURL + *out = new(string) + **out = **in + } + if in.AddrbookScript != nil { + in, out := &in.AddrbookScript, &out.AddrbookScript + *out = new(string) + **out = **in + } if in.GenesisURL != nil { in, out := &in.GenesisURL, &out.GenesisURL *out = new(string) diff --git a/api/v1alpha1/scheduledvolumesnapshot_types.go b/api/v1alpha1/scheduledvolumesnapshot_types.go index 2d7e0669..333662de 100644 --- a/api/v1alpha1/scheduledvolumesnapshot_types.go +++ b/api/v1alpha1/scheduledvolumesnapshot_types.go @@ -99,6 +99,11 @@ type LocalFullNodeRef struct { // DEPRECATED: CosmosFullNode must be in the same namespace as the ScheduledVolumeSnapshot. This field is ignored. // +optional Namespace string `json:"namespace"` + + // Index of the pod to snapshot. If not provided, will do any pod in the CosmosFullNode. + // Useful when snapshots are local to the same node as the pod, requiring snapshots across multiple pods/nodes. + // +optional + Ordinal *int32 `json:"ordinal"` } // ScheduledVolumeSnapshotStatus defines the observed state of ScheduledVolumeSnapshot diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index f9e5727e..a0b48773 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -61,6 +61,11 @@ func (in *JobTemplateSpec) DeepCopy() *JobTemplateSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *LocalFullNodeRef) DeepCopyInto(out *LocalFullNodeRef) { *out = *in + if in.Ordinal != nil { + in, out := &in.Ordinal, &out.Ordinal + *out = new(int32) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LocalFullNodeRef. @@ -78,7 +83,7 @@ func (in *ScheduledVolumeSnapshot) DeepCopyInto(out *ScheduledVolumeSnapshot) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - out.Spec = in.Spec + in.Spec.DeepCopyInto(&out.Spec) in.Status.DeepCopyInto(&out.Status) } @@ -135,7 +140,7 @@ func (in *ScheduledVolumeSnapshotList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ScheduledVolumeSnapshotSpec) DeepCopyInto(out *ScheduledVolumeSnapshotSpec) { *out = *in - out.FullNodeRef = in.FullNodeRef + in.FullNodeRef.DeepCopyInto(&out.FullNodeRef) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ScheduledVolumeSnapshotSpec. diff --git a/config/crd/bases/cosmos.strange.love_cosmosfullnodes.yaml b/config/crd/bases/cosmos.strange.love_cosmosfullnodes.yaml index f0b78e5b..ce869fa6 100644 --- a/config/crd/bases/cosmos.strange.love_cosmosfullnodes.yaml +++ b/config/crd/bases/cosmos.strange.love_cosmosfullnodes.yaml @@ -45,6 +45,27 @@ spec: chain: description: Blockchain-specific configuration. properties: + addrbookScript: + description: 'Specify shell (sh) script commands to properly download + and save the address book file. Prefer AddrbookURL if the file + is in a conventional format. The available shell commands are + from docker image ghcr.io/strangelove-ventures/infra-toolkit, + including wget and curl. Save the file to env var $ADDRBOOK_FILE. + E.g. curl https://url-to-addrbook.com > $ADDRBOOK_FILE Takes + precedence over AddrbookURL. Hint: Use "set -eux" in your script. + Available env vars: $HOME: The home directory. $ADDRBOOK_FILE: + The location of the final address book file. $CONFIG_DIR: The + location of the config dir that houses the address book file. + Used for extracting from archives. The archive must have a single + file called "addrbook.json".' + type: string + addrbookURL: + description: 'URL to address book file to download from the internet. + The operator detects and properly handles the following file + extensions: .json, .json.gz, .tar, .tar.gz, .tar.gzip, .zip + Use AddrbookScript if the chain has an unconventional file format + or address book location.' + type: string app: description: App configuration applied to app.toml. properties: diff --git a/config/crd/bases/cosmos.strange.love_scheduledvolumesnapshots.yaml b/config/crd/bases/cosmos.strange.love_scheduledvolumesnapshots.yaml index 1a0cb1b5..7f2f4235 100644 --- a/config/crd/bases/cosmos.strange.love_scheduledvolumesnapshots.yaml +++ b/config/crd/bases/cosmos.strange.love_scheduledvolumesnapshots.yaml @@ -74,6 +74,13 @@ spec: description: 'DEPRECATED: CosmosFullNode must be in the same namespace as the ScheduledVolumeSnapshot. This field is ignored.' type: string + ordinal: + description: Index of the pod to snapshot. If not provided, will + do any pod in the CosmosFullNode. Useful when snapshots are + local to the same node as the pod, requiring snapshots across + multiple pods/nodes. + format: int32 + type: integer required: - name type: object diff --git a/controllers/suite_test.go b/controllers/suite_test.go deleted file mode 100644 index 3e9d77eb..00000000 --- a/controllers/suite_test.go +++ /dev/null @@ -1,64 +0,0 @@ -/* -Copyright 2022 Strangelove Ventures LLC. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package controllers - -import ( - "path/filepath" - "testing" - - "github.com/stretchr/testify/require" - "k8s.io/client-go/kubernetes/scheme" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/envtest" - - cosmosv1 "github.com/strangelove-ventures/cosmos-operator/api/v1" - cosmosv1alpha1 "github.com/strangelove-ventures/cosmos-operator/api/v1alpha1" - //+kubebuilder:scaffold:imports -) - -func TestAPIs(t *testing.T) { - t.Skip("TODO: Implement test. Always skipping because of dependency issues.") - // unable to start control plane itself: failed to start the controlplane. retried 5 times: fork/exec /usr/local/kubebuilder/bin/etcd: no such file or directory - - testEnv := &envtest.Environment{ - CRDDirectoryPaths: []string{filepath.Join("..", "config", "crd", "bases")}, - ErrorIfCRDPathMissing: true, - } - - t.Cleanup(func() { - if err := testEnv.Stop(); err != nil { - t.Errorf("failed to stop test environment: %v", err) - } - }) - - cfg, err := testEnv.Start() - - require.NoError(t, err) - require.NotNil(t, cfg) - - err = cosmosv1.AddToScheme(scheme.Scheme) - require.NoError(t, err) - - err = cosmosv1alpha1.AddToScheme(scheme.Scheme) - require.NoError(t, err) - - //+kubebuilder:scaffold:scheme - - k8sClient, err := client.New(cfg, client.Options{Scheme: scheme.Scheme}) - require.NoError(t, err) - require.NotNil(t, k8sClient) -} diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 00000000..4e4f0958 --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,163 @@ +# Cosmos Operator Architecture + +This is a high-level overview of the architecture of the Cosmos Operator. It is intended to be a reference for +developers. + +## Overview + +The operator was written with the [kubebuilder](https://github.com/kubernetes-sigs/kubebuilder) framework. + +Kubebuilder simplifies and provides abstractions for creating a Kubernetes controller. + +In a nutshell, an operator observes +a [CRD](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/). Its job is to match +cluster state with the desired state in the CRD. It +continually watches for changes and updates the cluster accordingly - a "control loop" pattern. + +Each controller implements a Reconcile method: + +```go +Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) +``` + +Unlike "built-in" controllers like Deployments or StatefulSets, operator controllers are visible in the cluster - one pod +backed by a Deployment under the cosmos-operator-system namespace. + +A controller can watch resources outside of the CRD it manages. For example, CosmosFullNode watches for pod deletions, +so it can spin up new pods if a user deletes one manually. + +The watching of resources is in this method for each controller: + +```go +SetupWithManager(ctx context.Context, mgr ctrl.Manager) error +``` + +Refer to kubebuilder docs for more info. + +### Makefile + +Kubebuilder generated much of the Makefile. It contains common tasks for developers. + +### `api` directory + +This directory contains the different CRDs. + +You should run `make generate manifests` each time you change CRDs. + +A CI job should fail if you forget to run this command after modifying the api structs. + +### `config` directory + +The config directory contains kustomize files generated by Kubebuilder. +Strangelove uses these files to deploy the operator (instead of a helm chart). +A helm chart is on the road map but presents challenges in keeping the kustomize and helm code in sync. + +### `controllers` directory + +The controllers directory contains every controller. + +This directory is not unit tested. The code in controllers should act like `main()` functions where it's mostly wiring +up of dependencies from `internal`. + +### `internal` directory + +Almost all the business logic lives in `internal` and houses the unit and integration tests. + +# CosmosFullNode + +This is the flagship CRD of the Cosmos Operator and contains the most complexity. + +### Builder, Diff, and Control Pattern + +Each resource has its own builder and controller (referred as "control" in this context). For example, +see `pvc_builder.go` and `pvc_control.go` which only manages PVCs. All builders should have file suffix `_builder.go` +and all control objects `_control.go`. + +The most complex builder is `pod_builder.go`. There may be opportunities to refactor it. + +The "control" pattern was loosely inspired by Kubernetes source code. + +Within the controller's `Reconcile(...)` method, the controller determines the order of operations of the separate +Control objects. + +On process start, each Control is initialized with a Diff and a Builder. + +On each reconcile loop: + +1. The Builder builds the desired resources from the CRD. +2. Control fetches a list of existing resources. +3. Control uses Diff to compute a diff of the existing to the desired. +4. Control makes changes based on what Diff reports. + +The Control tests are *integration tests* where we mock out the Kubernetes API, but not the Builder or Diff. The +tests run quickly (like unit tests) because we do not make any network calls. + +The Diff object (`type Diff[T client.Object] struct`) took several iterations to get right. There is probably little +need to tweak it further. + +The hardest problem with diffing is determining updates. Essentially, Diff looks for a `Revision() string` method on the +resource and sets a revision annotation. The revision is a simple fnv hash. It compares `Revision` to the existing annotation. +If different, we know it's an update. We cannot compare equality of existing resources directly because Kubernetes adds additional +annotations and fields. + +Builders return a `diff.Resource[T]` which Diff can use. Therefore, Control does not need to adapt resources. + +The fnv hash is computed from a resource's JSON representation, which has proven to be stable. + +### Special Note on Updating Status + +There are several controllers that update a +CosmosFullNode's [status subresource](https://book-v1.book.kubebuilder.io/basics/status_subresource): + +* CosmosFullNode +* ScheduledVolumeSnapshot +* SelfHealing + +Each update to the status subresource triggers another reconcile loop. We found multiple controllers updating status +caused race conditions. Updates were not applied or applied incorrectly. +Some controllers read the status to take action, so it's important to preserve the integrity of the status. + +Therefore, you must use the special `SyncUpdate(...)` method from `fullnode.StatusClient`. It ensures updates are +performed serially per CosmosFullNode. + +### Sentries + +Sentries are special because you should not include a readiness probe due to the way Tendermint/Comet remote +signing works. + +The remote signer reaches out to the sentry on the privval port. This is the inverse of what you'd expect, the sentry +reaching out to the remote signer. + +If the sentry does not detect a remote signer connection, it crashes. And the stable way to connect to a pod is through +a Kube Service. So we have a chicken or egg problem. The sentry must be "ready" to be added to the Service, but the +remote signer must connect to the sentry through the Service so it doesn't crash. + +Therefore, the CosmosFullNode controller inspects Tendermint/Comet as part of its rolling update strategy - not just +pod readiness state. + +### CacheController + +The CacheController is special in that it does not manage a CRD. + +It periodically polls every pod for its Tendermint/Comet status such as block height. The polling is done in +the background. It's a controller because it needs the reconcile loop to update which pods it needs to poll. + +The CacheController prevents slow reconcile loops. Previously, we queried this status on every reconcile loop. + +When other controllers want Comet status, they always hit the cache controller. + +# Scheduled Volume Snapshot + +Scheduled Volume Snapshot takes periodic backups. + +To preserve data integrity, it will temporarily delete a pod, so it can capture a PVC snapshot without any process +writing to it. + +It uses a finite state machine pattern in the main reconcile loop. + +# StatefulJob + +StatefulJob periodically runs a job on an interval (crontab not supported yet). The purpose is to run a job that +attaches to a PVC created from a VolumeSnapshot. + +It's the least developed of the CRDs. diff --git a/go.mod b/go.mod index 570c2b22..301051ba 100644 --- a/go.mod +++ b/go.mod @@ -89,12 +89,12 @@ require ( github.com/spf13/pflag v1.0.5 // indirect github.com/subosito/gotenv v1.4.2 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/crypto v0.9.0 // indirect - golang.org/x/net v0.10.0 // indirect + golang.org/x/crypto v0.14.0 // indirect + golang.org/x/net v0.17.0 // indirect golang.org/x/oauth2 v0.7.0 // indirect - golang.org/x/sys v0.8.0 // indirect - golang.org/x/term v0.8.0 // indirect - golang.org/x/text v0.9.0 // indirect + golang.org/x/sys v0.13.0 // indirect + golang.org/x/term v0.13.0 // indirect + golang.org/x/text v0.13.0 // indirect golang.org/x/time v0.1.0 // indirect gomodules.xyz/jsonpatch/v2 v2.2.0 // indirect google.golang.org/appengine v1.6.7 // indirect diff --git a/go.sum b/go.sum index 967b7481..c4949028 100644 --- a/go.sum +++ b/go.sum @@ -429,8 +429,8 @@ golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20211215153901-e495a2d5b3d3/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= -golang.org/x/crypto v0.9.0 h1:LF6fAI+IutBocDJ2OT0Q1g8plpYljMZ4+lty+dsqw3g= -golang.org/x/crypto v0.9.0/go.mod h1:yrmDGqONDYtNj3tH8X9dzUun2m2lzPa9ngI6/RUPGR0= +golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc= +golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -506,8 +506,8 @@ golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= -golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M= -golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= +golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -583,12 +583,12 @@ golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= -golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= +golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.8.0 h1:n5xxQn2i3PC0yLAbjTpNT85q/Kgzcr2gIoX9OrJUols= -golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= +golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek= +golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -598,8 +598,8 @@ golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= -golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= diff --git a/internal/fullnode/addrbook.go b/internal/fullnode/addrbook.go new file mode 100644 index 00000000..d1f94664 --- /dev/null +++ b/internal/fullnode/addrbook.go @@ -0,0 +1,40 @@ +package fullnode + +import ( + _ "embed" + "fmt" + + cosmosv1 "github.com/strangelove-ventures/cosmos-operator/api/v1" +) + +var ( + //go:embed script/download-addrbook.sh + scriptDownloadAddrbook string +) + +const addrbookScriptWrapper = `ls $CONFIG_DIR/addrbook.json 1> /dev/null 2>&1 +ADDRBOOK_EXISTS=$? +if [ $ADDRBOOK_EXISTS -eq 0 ]; then + echo "Address book already exists" + exit 0 +fi +ls -l $CONFIG_DIR/addrbook.json +%s +ls -l $CONFIG_DIR/addrbook.json + +echo "Address book $ADDRBOOK_FILE downloaded" +` + +// DownloadGenesisCommand returns a proper address book command for use in an init container. +func DownloadAddrbookCommand(cfg cosmosv1.ChainSpec) (string, []string) { + args := []string{"-c"} + switch { + case cfg.AddrbookScript != nil: + args = append(args, fmt.Sprintf(addrbookScriptWrapper, *cfg.AddrbookScript)) + case cfg.AddrbookURL != nil: + args = append(args, fmt.Sprintf(addrbookScriptWrapper, scriptDownloadAddrbook), "-s", *cfg.AddrbookURL) + default: + args = append(args, "echo Using default address book") + } + return "sh", args +} diff --git a/internal/fullnode/addrbook_test.go b/internal/fullnode/addrbook_test.go new file mode 100644 index 00000000..5ca8e783 --- /dev/null +++ b/internal/fullnode/addrbook_test.go @@ -0,0 +1,72 @@ +package fullnode + +import ( + "testing" + + cosmosv1 "github.com/strangelove-ventures/cosmos-operator/api/v1" + "github.com/stretchr/testify/require" +) + +func TestDownloadAddrbookCommand(t *testing.T) { + t.Parallel() + + requireValidScript := func(t *testing.T, script string) { + t.Helper() + require.NotEmpty(t, script) + require.Contains(t, script, `if [ $ADDRBOOK_EXISTS -eq 0 ]`) + } + + t.Run("default", func(t *testing.T) { + var cfg cosmosv1.ChainSpec + + cmd, args := DownloadAddrbookCommand(cfg) + require.Equal(t, "sh", cmd) + + require.Len(t, args, 2) + + require.Equal(t, "-c", args[0]) + + got := args[1] + require.NotContains(t, got, "ADDRBOOK_EXISTS") + require.Contains(t, got, "Using default address book") + }) + + t.Run("download", func(t *testing.T) { + cfg := cosmosv1.ChainSpec{ + AddrbookURL: ptr("https://example.com/addrbook.json"), + } + cmd, args := DownloadAddrbookCommand(cfg) + require.Equal(t, "sh", cmd) + + require.Len(t, args, 4) + + require.Equal(t, "-c", args[0]) + got := args[1] + requireValidScript(t, got) + require.Contains(t, got, `ADDRBOOK_URL`) + require.Contains(t, got, "download_json") + + require.Equal(t, "-s", args[2]) + require.Equal(t, "https://example.com/addrbook.json", args[3]) + }) + + t.Run("custom", func(t *testing.T) { + cfg := cosmosv1.ChainSpec{ + // Keeping this to assert that custom script takes precedence. + AddrbookURL: ptr("https://example.com/addrbook.json"), + AddrbookScript: ptr("echo hi"), + } + cmd, args := DownloadAddrbookCommand(cfg) + require.Equal(t, "sh", cmd) + + require.Len(t, args, 2) + + require.Equal(t, "-c", args[0]) + + got := args[1] + requireValidScript(t, got) + + require.NotContains(t, got, "ADDRBOOK_URL") + require.Contains(t, got, "echo hi") + }) +} diff --git a/internal/fullnode/pod_builder.go b/internal/fullnode/pod_builder.go index 8e94e47f..fba3dfe3 100644 --- a/internal/fullnode/pod_builder.go +++ b/internal/fullnode/pod_builder.go @@ -278,6 +278,7 @@ func envVars(crd *cosmosv1.CosmosFullNode) []corev1.EnvVar { {Name: "HOME", Value: workDir}, {Name: "CHAIN_HOME", Value: home}, {Name: "GENESIS_FILE", Value: path.Join(home, "config", "genesis.json")}, + {Name: "ADDRBOOK_FILE", Value: path.Join(home, "config", "addrbook.json")}, {Name: "CONFIG_DIR", Value: path.Join(home, "config")}, {Name: "DATA_DIR", Value: path.Join(home, "data")}, } @@ -287,6 +288,7 @@ func initContainers(crd *cosmosv1.CosmosFullNode, moniker string) []corev1.Conta tpl := crd.Spec.PodTemplate binary := crd.Spec.ChainSpec.Binary genesisCmd, genesisArgs := DownloadGenesisCommand(crd.Spec.ChainSpec) + addrbookCmd, addrbookArgs := DownloadAddrbookCommand(crd.Spec.ChainSpec) env := envVars(crd) initCmd := fmt.Sprintf("%s init %s --chain-id %s", binary, moniker, crd.Spec.ChainSpec.ChainID) @@ -332,7 +334,15 @@ echo "Initializing into tmp dir for downstream processing..." ImagePullPolicy: tpl.ImagePullPolicy, WorkingDir: workDir, }, - + { + Name: "addrbook-init", + Image: infraToolImage, + Command: []string{addrbookCmd}, + Args: addrbookArgs, + Env: env, + ImagePullPolicy: tpl.ImagePullPolicy, + WorkingDir: workDir, + }, { Name: "config-merge", Image: infraToolImage, diff --git a/internal/fullnode/pod_builder_test.go b/internal/fullnode/pod_builder_test.go index 9558f090..3ba1b8a3 100644 --- a/internal/fullnode/pod_builder_test.go +++ b/internal/fullnode/pod_builder_test.go @@ -222,10 +222,12 @@ func TestPodBuilder(t *testing.T) { require.Equal(t, startContainer.Env[1].Value, "/home/operator/cosmos") require.Equal(t, startContainer.Env[2].Name, "GENESIS_FILE") require.Equal(t, startContainer.Env[2].Value, "/home/operator/cosmos/config/genesis.json") - require.Equal(t, startContainer.Env[3].Name, "CONFIG_DIR") - require.Equal(t, startContainer.Env[3].Value, "/home/operator/cosmos/config") - require.Equal(t, startContainer.Env[4].Name, "DATA_DIR") - require.Equal(t, startContainer.Env[4].Value, "/home/operator/cosmos/data") + require.Equal(t, startContainer.Env[3].Name, "ADDRBOOK_FILE") + require.Equal(t, startContainer.Env[3].Value, "/home/operator/cosmos/config/addrbook.json") + require.Equal(t, startContainer.Env[4].Name, "CONFIG_DIR") + require.Equal(t, startContainer.Env[4].Value, "/home/operator/cosmos/config") + require.Equal(t, startContainer.Env[5].Name, "DATA_DIR") + require.Equal(t, startContainer.Env[5].Value, "/home/operator/cosmos/data") require.Equal(t, envVars(&crd), startContainer.Env) healthContainer := pod.Spec.Containers[1] @@ -242,7 +244,7 @@ func TestPodBuilder(t *testing.T) { } require.Equal(t, healthPort, healthContainer.Ports[0]) - require.Len(t, lo.Map(pod.Spec.InitContainers, func(c corev1.Container, _ int) string { return c.Name }), 5) + require.Len(t, lo.Map(pod.Spec.InitContainers, func(c corev1.Container, _ int) string { return c.Name }), 6) wantInitImages := []string{ "ghcr.io/strangelove-ventures/infra-toolkit:v0.0.1", @@ -250,6 +252,7 @@ func TestPodBuilder(t *testing.T) { "ghcr.io/strangelove-ventures/infra-toolkit:v0.0.1", "ghcr.io/strangelove-ventures/infra-toolkit:v0.0.1", "ghcr.io/strangelove-ventures/infra-toolkit:v0.0.1", + "ghcr.io/strangelove-ventures/infra-toolkit:v0.0.1", } require.Equal(t, wantInitImages, lo.Map(pod.Spec.InitContainers, func(c corev1.Container, _ int) string { return c.Image @@ -267,7 +270,11 @@ func TestPodBuilder(t *testing.T) { require.Contains(t, initCont.Args[1], `osmosisd init osmosis-6 --chain-id osmosis-123 --home "$CHAIN_HOME"`) require.Contains(t, initCont.Args[1], `osmosisd init osmosis-6 --chain-id osmosis-123 --home "$HOME/.tmp"`) - mergeConfig := pod.Spec.InitContainers[3] + mergeConfig1 := pod.Spec.InitContainers[3] + // The order of config-merge arguments is important. Rightmost takes precedence. + require.Contains(t, mergeConfig1.Args[1], `echo Using default address book`) + + mergeConfig := pod.Spec.InitContainers[4] // The order of config-merge arguments is important. Rightmost takes precedence. require.Contains(t, mergeConfig.Args[1], `config-merge -f toml "$TMP_DIR/config.toml" "$OVERLAY_DIR/config-overlay.toml" > "$CONFIG_DIR/config.toml"`) require.Contains(t, mergeConfig.Args[1], `config-merge -f toml "$TMP_DIR/app.toml" "$OVERLAY_DIR/app-overlay.toml" > "$CONFIG_DIR/app.toml`) @@ -293,10 +300,12 @@ func TestPodBuilder(t *testing.T) { require.Equal(t, container.Env[1].Value, "/home/operator/.osmosisd") require.Equal(t, container.Env[2].Name, "GENESIS_FILE") require.Equal(t, container.Env[2].Value, "/home/operator/.osmosisd/config/genesis.json") - require.Equal(t, container.Env[3].Name, "CONFIG_DIR") - require.Equal(t, container.Env[3].Value, "/home/operator/.osmosisd/config") - require.Equal(t, container.Env[4].Name, "DATA_DIR") - require.Equal(t, container.Env[4].Value, "/home/operator/.osmosisd/data") + require.Equal(t, container.Env[3].Name, "ADDRBOOK_FILE") + require.Equal(t, container.Env[3].Value, "/home/operator/.osmosisd/config/addrbook.json") + require.Equal(t, container.Env[4].Name, "CONFIG_DIR") + require.Equal(t, container.Env[4].Value, "/home/operator/.osmosisd/config") + require.Equal(t, container.Env[5].Name, "DATA_DIR") + require.Equal(t, container.Env[5].Value, "/home/operator/.osmosisd/data") require.NotEmpty(t, pod.Spec.InitContainers) @@ -554,7 +563,7 @@ gaiad start --home /home/operator/cosmos` require.Equal(t, "/foo", extraVol[0].MountPath) initConts := lo.SliceToMap(pod.Spec.InitContainers, func(c corev1.Container) (string, corev1.Container) { return c.Name, c }) - require.ElementsMatch(t, []string{"clean-init", "chain-init", "new-init", "genesis-init", "config-merge"}, lo.Keys(initConts)) + require.ElementsMatch(t, []string{"clean-init", "chain-init", "new-init", "genesis-init", "addrbook-init", "config-merge"}, lo.Keys(initConts)) require.Equal(t, "foo:latest", initConts["chain-init"].Image) }) diff --git a/internal/fullnode/pvc_builder.go b/internal/fullnode/pvc_builder.go index 5720265a..12f113c0 100644 --- a/internal/fullnode/pvc_builder.go +++ b/internal/fullnode/pvc_builder.go @@ -6,10 +6,16 @@ import ( cosmosv1 "github.com/strangelove-ventures/cosmos-operator/api/v1" "github.com/strangelove-ventures/cosmos-operator/internal/diff" "github.com/strangelove-ventures/cosmos-operator/internal/kube" + "gopkg.in/inf.v0" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +const ( + snapshotGrowthFactor = 102 +) + var ( defaultAccessModes = []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce} ) @@ -79,9 +85,13 @@ func pvcResources(crd *cosmosv1.CosmosFullNode) corev1.ResourceRequirements { reqs = crd.Spec.VolumeClaimTemplate.Resources size = reqs.Requests[corev1.ResourceStorage] ) + if autoScale := crd.Status.SelfHealing.PVCAutoScale; autoScale != nil { - if autoScale.RequestedSize.Cmp(size) > 0 { - reqs.Requests[corev1.ResourceStorage] = autoScale.RequestedSize + requestedSize := autoScale.RequestedSize.DeepCopy() + newSize := requestedSize.AsDec() + sizeWithPadding := resource.NewDecimalQuantity(*newSize.Mul(newSize, inf.NewDec(snapshotGrowthFactor, 2)), resource.DecimalSI) + if sizeWithPadding.Cmp(size) > 0 { + reqs.Requests[corev1.ResourceStorage] = *sizeWithPadding } } return reqs diff --git a/internal/fullnode/pvc_builder_test.go b/internal/fullnode/pvc_builder_test.go index 8cd9acf6..26cdb51e 100644 --- a/internal/fullnode/pvc_builder_test.go +++ b/internal/fullnode/pvc_builder_test.go @@ -28,6 +28,7 @@ func TestBuildPVCs(t *testing.T) { Requests: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse("100G")}, }, } + crd.Spec.InstanceOverrides = map[string]cosmosv1.InstanceOverridesSpec{ "juno-0": {}, } @@ -148,30 +149,81 @@ func TestBuildPVCs(t *testing.T) { } }) - t.Run("pvc auto scale", func(t *testing.T) { - for _, tt := range []struct { - SpecQuant, AutoScaleQuant, WantQuant string - }{ - {"100G", "99G", "100G"}, - {"100G", "101G", "101G"}, - } { - crd := defaultCRD() - crd.Spec.Replicas = 1 - crd.Spec.VolumeClaimTemplate = cosmosv1.PersistentVolumeClaimSpec{ - Resources: corev1.ResourceRequirements{ - Requests: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse(tt.SpecQuant)}, - }, - } - crd.Status.SelfHealing.PVCAutoScale = &cosmosv1.PVCAutoScaleStatus{ - RequestedSize: resource.MustParse(tt.AutoScaleQuant), + t.Run("pvc auto scale with padding", func(t *testing.T) { + t.Run("given auto scale size less then current size", func(t *testing.T) { + for _, tt := range []struct { + SpecQuant, AutoScaleQuant, WantQuant string + }{ + {"100G", "97G", "100G"}, + } { + crd := defaultCRD() + crd.Spec.Replicas = 1 + crd.Spec.VolumeClaimTemplate = cosmosv1.PersistentVolumeClaimSpec{ + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse(tt.SpecQuant)}, + }, + } + crd.Status.SelfHealing.PVCAutoScale = &cosmosv1.PVCAutoScaleStatus{ + RequestedSize: resource.MustParse(tt.AutoScaleQuant), + } + + pvcs := BuildPVCs(&crd) + require.Len(t, pvcs, 1, tt) + + want := corev1.ResourceList{corev1.ResourceStorage: resource.MustParse(tt.WantQuant)} + require.Equal(t, want.Storage().Value(), pvcs[0].Object().Spec.Resources.Requests.Storage().Value(), tt) } + }) - pvcs := BuildPVCs(&crd) - require.Len(t, pvcs, 1, tt) + t.Run("given auto scale size equal to current size", func(t *testing.T) { + for _, tt := range []struct { + SpecQuant, AutoScaleQuant, WantQuant string + }{ + {"102G", "100G", "102G"}, + } { + crd := defaultCRD() + crd.Spec.Replicas = 1 + crd.Spec.VolumeClaimTemplate = cosmosv1.PersistentVolumeClaimSpec{ + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse(tt.SpecQuant)}, + }, + } + crd.Status.SelfHealing.PVCAutoScale = &cosmosv1.PVCAutoScaleStatus{ + RequestedSize: resource.MustParse(tt.AutoScaleQuant), + } + + pvcs := BuildPVCs(&crd) + require.Len(t, pvcs, 1, tt) + + want := corev1.ResourceList{corev1.ResourceStorage: resource.MustParse(tt.WantQuant)} + require.Equal(t, want, pvcs[0].Object().Spec.Resources.Requests, tt) + } + }) - want := corev1.ResourceList{corev1.ResourceStorage: resource.MustParse(tt.WantQuant)} - require.Equal(t, want, pvcs[0].Object().Spec.Resources.Requests, tt) - } + t.Run("given auto scale size greater then current size", func(t *testing.T) { + for _, tt := range []struct { + SpecQuant, AutoScaleQuant, WantQuant string + }{ + {"100G", "100G", "102G"}, + } { + crd := defaultCRD() + crd.Spec.Replicas = 1 + crd.Spec.VolumeClaimTemplate = cosmosv1.PersistentVolumeClaimSpec{ + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse(tt.SpecQuant)}, + }, + } + crd.Status.SelfHealing.PVCAutoScale = &cosmosv1.PVCAutoScaleStatus{ + RequestedSize: resource.MustParse(tt.AutoScaleQuant), + } + + pvcs := BuildPVCs(&crd) + require.Len(t, pvcs, 1, tt) + + want := corev1.ResourceList{corev1.ResourceStorage: resource.MustParse(tt.WantQuant)} + require.Equal(t, want.Storage().Value(), pvcs[0].Object().Spec.Resources.Requests.Storage().Value(), tt) + } + }) }) test.HasTypeLabel(t, func(crd cosmosv1.CosmosFullNode) []map[string]string { diff --git a/internal/fullnode/script/download-addrbook.sh b/internal/fullnode/script/download-addrbook.sh new file mode 100755 index 00000000..4eaad8d6 --- /dev/null +++ b/internal/fullnode/script/download-addrbook.sh @@ -0,0 +1,53 @@ +set -eu + +# $ADDRBOOK_FILE and $CONFIG_DIR already set via pod env vars. + +ADDRBOOK_URL="$1" + +echo "Downloading address book file $ADDRBOOK_URL to $ADDRBOOK_FILE..." + +download_json() { + echo "Downloading plain json..." + wget -c -O "$ADDRBOOK_FILE" "$ADDRBOOK_URL" +} + +download_jsongz() { + echo "Downloading json.gz..." + wget -c -O - "$ADDRBOOK_URL" | gunzip -c >"$ADDRBOOK_FILE" +} + +download_tar() { + echo "Downloading and extracting tar..." + wget -c -O - "$ADDRBOOK_URL" | tar -x -C "$CONFIG_DIR" +} + +download_targz() { + echo "Downloading and extracting compressed tar..." + wget -c -O - "$ADDRBOOK_URL" | tar -xz -C "$CONFIG_DIR" +} + +download_zip() { + echo "Downloading and extracting zip..." + wget -c -O tmp_genesis.zip "$ADDRBOOK_URL" + unzip tmp_genesis.zip + rm tmp_genesis.zip + mv genesis.json "$ADDRBOOK_FILE" +} + +rm -f "$ADDRBOOK_FILE" + +case "$ADDRBOOK_URL" in +*.json.gz) download_jsongz ;; +*.json) download_json ;; +*.tar.gz) download_targz ;; +*.tar.gzip) download_targz ;; +*.tar) download_tar ;; +*.zip) download_zip ;; +*) + echo "Unable to handle file extension for $ADDRBOOK_URL" + exit 1 + ;; +esac + +echo "Saved address book file to $ADDRBOOK_FILE." +echo "Download address book file complete." diff --git a/internal/volsnapshot/vol_snapshot_control.go b/internal/volsnapshot/vol_snapshot_control.go index 4686a5ca..68b16473 100644 --- a/internal/volsnapshot/vol_snapshot_control.go +++ b/internal/volsnapshot/vol_snapshot_control.go @@ -67,7 +67,23 @@ func (control VolumeSnapshotControl) FindCandidate(ctx context.Context, crd *cos return Candidate{}, fmt.Errorf("%d or more pods must be in-sync to prevent downtime, found %d in-sync", minAvail, availCount) } - pod := synced[0] + var pod *corev1.Pod + + if crd.Spec.FullNodeRef.Ordinal != nil { + podIndex := *crd.Spec.FullNodeRef.Ordinal + podIndexStr := fmt.Sprintf("%d", podIndex) + for _, p := range synced { + if p.Annotations["app.kubernetes.io/ordinal"] == podIndexStr { + pod = p + break + } + } + if pod == nil { + return Candidate{}, fmt.Errorf("in-sync pod with index %d not found", podIndex) + } + } else { + pod = synced[0] + } return Candidate{ PodLabels: pod.Labels, diff --git a/internal/volsnapshot/vol_snapshot_control_test.go b/internal/volsnapshot/vol_snapshot_control_test.go index 1772c5c1..4cb5aa1a 100644 --- a/internal/volsnapshot/vol_snapshot_control_test.go +++ b/internal/volsnapshot/vol_snapshot_control_test.go @@ -123,6 +123,75 @@ func TestVolumeSnapshotControl_FindCandidate(t *testing.T) { require.Equal(t, candidate.Labels, got.PodLabels) }) + t.Run("happy path with index", func(t *testing.T) { + pods := make([]corev1.Pod, 3) + for i := range pods { + pods[i].Status.Conditions = []corev1.PodCondition{readyCondition} + } + var mClient mockPodClient + mClient.Items = pods + + var fullnodeCRD cosmosv1.CosmosFullNode + fullnodeCRD.Name = fullNodeName + // Purposefully using PodBuilder to cross-test any breaking changes in PodBuilder which affects + // finding the PVC name. + candidate, err := fullnode.NewPodBuilder(&fullnodeCRD).WithOrdinal(1).Build() + require.NoError(t, err) + + candidate.Annotations["app.kubernetes.io/ordinal"] = "1" + + control := NewVolumeSnapshotControl(&mClient, mockPodFilter{ + SyncedPodsFn: func(ctx context.Context, controller client.ObjectKey) []*corev1.Pod { + require.Equal(t, namespace, controller.Namespace) + require.Equal(t, fullNodeName, controller.Name) + return []*corev1.Pod{candidate, new(corev1.Pod), new(corev1.Pod)} + }, + }) + + indexCRD := crd.DeepCopy() + index := int32(1) + indexCRD.Spec.FullNodeRef.Ordinal = &index + + got, err := control.FindCandidate(ctx, indexCRD) + require.NoError(t, err) + + require.Equal(t, "cosmoshub-1", got.PodName) + require.Equal(t, "pvc-cosmoshub-1", got.PVCName) + require.NotEmpty(t, got.PodLabels) + require.Equal(t, candidate.Labels, got.PodLabels) + }) + + t.Run("index not available", func(t *testing.T) { + pods := make([]corev1.Pod, 3) + for i := range pods { + pods[i].Status.Conditions = []corev1.PodCondition{readyCondition} + } + var mClient mockPodClient + mClient.Items = pods + + var fullnodeCRD cosmosv1.CosmosFullNode + fullnodeCRD.Name = fullNodeName + // Purposefully using PodBuilder to cross-test any breaking changes in PodBuilder which affects + // finding the PVC name. + candidate, err := fullnode.NewPodBuilder(&fullnodeCRD).WithOrdinal(1).Build() + require.NoError(t, err) + + control := NewVolumeSnapshotControl(&mClient, mockPodFilter{ + SyncedPodsFn: func(ctx context.Context, controller client.ObjectKey) []*corev1.Pod { + require.Equal(t, namespace, controller.Namespace) + require.Equal(t, fullNodeName, controller.Name) + return []*corev1.Pod{candidate, new(corev1.Pod), new(corev1.Pod)} + }, + }) + + indexCRD := crd.DeepCopy() + index := int32(2) + indexCRD.Spec.FullNodeRef.Ordinal = &index + + _, err = control.FindCandidate(ctx, indexCRD) + require.ErrorContains(t, err, "in-sync pod with index 2 not found") + }) + t.Run("custom min available", func(t *testing.T) { var pod corev1.Pod pod.Name = "found-me" diff --git a/local.Dockerfile b/local.Dockerfile new file mode 100644 index 00000000..0ef37e31 --- /dev/null +++ b/local.Dockerfile @@ -0,0 +1,33 @@ +FROM golang:1.20-alpine AS builder + +RUN apk add --update --no-cache gcc libc-dev + +WORKDIR /workspace +# Copy the Go Modules manifests +COPY go.mod go.mod +COPY go.sum go.sum +# cache deps before building and copying source so that we don't need to re-download as much +# and so that source changes don't invalidate our downloaded layer +RUN go mod download + +# Copy the go source +COPY *.go . +COPY api/ api/ +COPY controllers/ controllers/ +COPY internal/ internal/ + +ARG VERSION + +RUN export CGO_ENABLED=1 LDFLAGS='-linkmode external -extldflags "-static"'; \ + go build -ldflags "-X github.com/strangelove-ventures/cosmos-operator/internal/version.version=$VERSION $LDFLAGS" -a -o manager . + +# Build final image from scratch +FROM scratch + +LABEL org.opencontainers.image.source=https://github.com/strangelove-ventures/cosmos-operator + +WORKDIR / +COPY --from=builder /workspace/manager . +USER 65532:65532 + +ENTRYPOINT ["/manager"]