From 8c956cef00eb27a0da476b8e1cf9caf3009caaef Mon Sep 17 00:00:00 2001 From: Tobi Okedeji Date: Fri, 5 Jul 2024 20:18:58 +0100 Subject: [PATCH] Add prometheus metrics (#136) --- .github/workflows/ecr.yml | 56 +++++++++++++++++++++++++++++++++ cmd/node/execute.go | 9 ++++++ cmd/node/main.go | 65 +++++++++++++++++++++++++++++++++++++++ docker/Dockerfile_worker | 2 +- go.mod | 2 +- 5 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/ecr.yml diff --git a/.github/workflows/ecr.yml b/.github/workflows/ecr.yml new file mode 100644 index 0000000..c195f37 --- /dev/null +++ b/.github/workflows/ecr.yml @@ -0,0 +1,56 @@ +# This workflow will build and push a new container image to Amazon ECR, +# and then will deploy a new task definition to Amazon ECS which will be run by Fargate when a release is created +name: Build and Push docker image to ECR + +on: + push: + branches: + - add-prometheus-metrics + + +concurrency: + group: ${{ github.ref }} + cancel-in-progress: true +jobs: + build-push: + name: Build and Push docker image + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: us-east-1 + + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v1 + + - name: Build, tag, and push image to Amazon ECR + id: build-push-image + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + ECR_REPOSITORY: worker-prometheus-test + run: | + + IMAGE_TAG="test-latest-2" + + # compute-node HEADS + ECR_REPOSITORY_HEAD="${ECR_REPOSITORY}-head" + + docker build --pull -f docker/Dockerfile_head \ + --build-arg "GH_TOKEN=${{ secrets.GH_READONLY_PAT }}" \ + -t $ECR_REGISTRY/$ECR_REPOSITORY_HEAD:$IMAGE_TAG . + docker push $ECR_REGISTRY/$ECR_REPOSITORY_HEAD:$IMAGE_TAG + + # Build a docker container and push it to ECR so that it can be deployed to ECS. + # compute-node workers + docker build --pull -f docker/Dockerfile_worker \ + --build-arg "GH_TOKEN=${{ secrets.GH_READONLY_PAT }}" \ + -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . + docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG diff --git a/cmd/node/execute.go b/cmd/node/execute.go index d9599d7..7413198 100644 --- a/cmd/node/execute.go +++ b/cmd/node/execute.go @@ -67,8 +67,14 @@ func sendResultsToChain(log zerolog.Logger, appChainClient *AppChain, res node.C } if appChainClient.Config.WorkerMode == WorkerModeWorker { // for inference or forecast appChainClient.SendWorkerModeData(reqCtx, topicId, aggregate.Aggregate(res.Data)) + + // increament the number of commits made by worker + workerChainCommit.Inc() } else { // for losses appChainClient.SendReputerModeData(reqCtx, topicId, aggregate.Aggregate(res.Data)) + + // increament the number of commits made by reputer + reputerChainCommit.Inc() } } @@ -110,6 +116,9 @@ func createExecutor(a api.API) func(ctx echo.Context) error { res.Message = err.Error() } + // increament the number of responses made by reputer + headRequests.Inc() + // Send the response. return ctx.JSON(http.StatusOK, res) } diff --git a/cmd/node/main.go b/cmd/node/main.go index 4256ac8..cd27365 100644 --- a/cmd/node/main.go +++ b/cmd/node/main.go @@ -33,6 +33,8 @@ import ( "github.com/allora-network/b7s/node" "github.com/allora-network/b7s/peerstore" "github.com/allora-network/b7s/store" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" ) const ( @@ -41,6 +43,47 @@ const ( notFoundValue = -1 ) +var ( + opsProcessed = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "allora_node_total_operations", + Help: "The total number of processed operations", + }) + + headRequests = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "allora_head_node_total_requests", + Help: "The total number of request made by head node", + }) + + workerResponse = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "allora_worker_node_total_response", + Help: "The total number of responds from worker node", + }) + + reputerResponse = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "allora_reputer_node_total_response", + Help: "The total number of responds from reputer node", + }) + + workerChainCommit = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "allora_worker_node_chain_commit", + Help: "The total number of worker commits to the chain", + }) + + reputerChainCommit = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "allora_reputer_node_chain_commit", + Help: "The total number of reputer commits to the chain", + }) +) + +func init() { + prometheus.MustRegister(opsProcessed) + prometheus.MustRegister(headRequests) + prometheus.MustRegister(workerResponse) + prometheus.MustRegister(reputerResponse) + prometheus.MustRegister(workerChainCommit) + prometheus.MustRegister(reputerChainCommit) +} + func main() { os.Exit(run()) } @@ -217,6 +260,10 @@ func (e *AlloraExecutor) ExecuteFunction(requestID string, req execute.Request) fmt.Println("Error serializing WorkerDataBundle: ", err) return result, err } + + // increament the number of responses made by worker + workerResponse.Inc() + outputJson := string(workerDataBundleBytes) fmt.Println("Signed OutputJson sent to consensus: ", outputJson) result.Result.Stdout = outputJson @@ -405,11 +452,18 @@ func (e *AlloraExecutor) ExecuteFunction(requestID string, req execute.Request) fmt.Println("Error serializing WorkerDataBundle: ", err) return result, err } + + // increament the number of responses made by reputer + workerResponse.Inc() + outputJson := string(reputerDataResponseBytes) fmt.Println("Signed OutputJson sent to consensus: ", outputJson) result.Result.Stdout = outputJson } } + + // increament the number of operations processed by worker or reputer + opsProcessed.Inc() return result, err } @@ -648,6 +702,17 @@ func run() int { log.Info().Msg("Allora Node stopped") }() + // Start HTTP server for Prometheus metrics. + http.Handle("/metrics", promhttp.Handler()) + go func() { + log.Info().Str("role", role.String()).Msg("Starting metrics server on :2112") + if err := http.ListenAndServe(":2112", nil); err != nil { + log.Error().Err(err).Msg("Could not start metric server") + } + + log.Info().Msg("Metrics server stopped") + }() + // If we're a head node - start the REST API. if role == blockless.HeadNode { diff --git a/docker/Dockerfile_worker b/docker/Dockerfile_worker index 8e94579..78d0d02 100644 --- a/docker/Dockerfile_worker +++ b/docker/Dockerfile_worker @@ -80,6 +80,6 @@ RUN groupadd -g 1001 ${USERNAME} \ USER ${USERNAME} -EXPOSE 8080 9527 +EXPOSE 8080 9527 2112 ENTRYPOINT ["allora-node"] diff --git a/go.mod b/go.mod index 78a4815..f44360d 100644 --- a/go.mod +++ b/go.mod @@ -198,7 +198,7 @@ require ( github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/polydawn/refmt v0.89.0 // indirect - github.com/prometheus/client_golang v1.18.0 // indirect + github.com/prometheus/client_golang v1.18.0 github.com/prometheus/client_model v0.6.0 // indirect github.com/prometheus/common v0.47.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect