uc-cdis · Avantol13 · Jul 26, 2024 · Jul 26, 2024 · Jul 29, 2024 · Jul 29, 2024
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -1,8 +1,9 @@
+# push will run on every pushed commit to any branch (so this will rerun the tests
+# once a branch gets merged to main in addition to any new commits on any branch)
+on: push
+
 name: CI
-on:
-  push:
-  pull_request:
-    types: [opened, reopened]
+
 concurrency:
   group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}'
   cancel-in-progress: true

diff --git a/.secrets.baseline b/.secrets.baseline
@@ -121,7 +121,7 @@
         "filename": ".github/workflows/ci.yaml",
         "hashed_secret": "3e26d6750975d678acb8fa35a0f69237881576b0",
         "is_verified": false,
-        "line_number": 13
+        "line_number": 14
       }
     ],
     "deployment/scripts/postgresql/postgresql_init.sql": [
@@ -422,5 +422,5 @@
       }
     ]
   },
-  "generated_at": "2024-07-25T17:19:58Z"
+  "generated_at": "2024-07-26T20:11:41Z"
 }
diff --git a/README.md b/README.md
@@ -386,7 +386,16 @@ We use JSON Web Tokens (JWTs) as the format for all tokens of the following type
 - OIDC access token: this token can be sent to Gen3 services via bearer header and get protected resources.
 - OIDC refresh token: this token can be sent to fence to request a new access / id token.
 
+## Unit testing
 
+The easiest way to ensure your environment is getting set up for tests the same
+way the CI/CD is by using the setup and test run script the CI/CD uses.
+
+You can run unit tests (which are run with pytest behind the scenes) like this:
+
+```shell
+bash ./tests/ci_commands_script.sh
+```
 
 ### JWT Information
 

diff --git a/deployment/scripts/metrics/setup_prometheus b/deployment/scripts/metrics/setup_prometheus
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+# This script is called by:
+#   UWSGI during startup, this gets moved to the /fence directory in the Dockerfile
+#     - It prepares the prometheus_multiproc_dir folder to store the metrics from separate uwsgi workers (per PID)
+#   run.py
+#     - So local runs setup necessary environment vars and folders for prometheus metrics
+#   Test framework in conftest
+#     - So test runs setup necessary environment vars and folders for prometheus metrics
+
+# Usage:
+#   ./script_name.sh [DIR] [true]
+
+# Default directory if no argument is provided
+DIR=${1:-/var/tmp/prometheus_metrics}
+
+# Determine whether to wipe the directory (default is to wipe)
+SETUP_DIR=${2:-true}
+
+set -ex
+
+if [[ "$SETUP_DIR" == "true" ]]; then
+    echo "setting up $PROMETHEUS_MULTIPROC_DIR. clearing existing files, ensuring it exists, chmod 755"
+    rm -Rf "$DIR"
+    mkdir -p "$DIR"
+    chmod 755 "$DIR"
+fi
+
+if id -u nginx &>/dev/null; then
+    chown "$(id -u nginx)":"$(id -g nginx)" "$DIR"
+fi
+
+export PROMETHEUS_MULTIPROC_DIR="$DIR"
+echo "PROMETHEUS_MULTIPROC_DIR is $PROMETHEUS_MULTIPROC_DIR"
diff --git a/deployment/uwsgi/uwsgi.ini b/deployment/uwsgi/uwsgi.ini
@@ -28,8 +28,7 @@ pythonpath = /usr/local/src/*
 # metrics setup
 stats = 127.0.0.1:9191
 stats-http = true
-env = prometheus_multiproc_dir=/var/tmp/uwsgi_flask_metrics
-exec-asap = /fence/clear_prometheus_multiproc /var/tmp/uwsgi_flask_metrics
+exec-asap = /fence/setup_prometheus /var/tmp/prometheus_metrics
 
 # Initialize application in worker processes, not master. This prevents the
 # workers from all trying to open the same database connections at startup.

diff --git a/fence/__init__.py b/fence/__init__.py
@@ -27,7 +27,7 @@
 )
 
 from fence.auth import logout, build_redirect_url
-from fence.metrics import metrics
+from fence.metrics import Metrics
 from fence.blueprints.data.indexd import S3IndexedFileLocation
 from fence.blueprints.login.utils import allowed_login_redirects, domain
 from fence.errors import UserError
@@ -97,6 +97,8 @@ def app_init(
     logger.info(
         f"Prometheus metrics are{'' if config['ENABLE_PROMETHEUS_METRICS'] else ' NOT'} enabled."
     )
+    # Initialize the Metrics instance
+    app.metrics = Metrics(enabled=config["ENABLE_PROMETHEUS_METRICS"])
 
 
 def app_sessions(app):
@@ -207,7 +209,7 @@ def metrics_endpoint():
         /!\ There is no authz control on this endpoint!
         In cloud-automation setups, access to this endpoint is blocked at the revproxy level.
         """
-        data, content_type = metrics.get_latest_metrics()
+        data, content_type = flask.current_app.metrics.get_latest_metrics()
         return flask.Response(data, content_type=content_type)
 
 

diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py
@@ -49,7 +49,6 @@
 from fence.resources.ga4gh.passports import sync_gen3_users_authz_from_ga4gh_passports
 from fence.resources.audit.utils import enable_audit_logging
 from fence.utils import get_valid_expiration_from_request
-from fence.metrics import metrics
 
 from . import multipart_upload
 from ...models import AssumeRoleCacheAWS, query_for_user, query_for_user_by_id
@@ -210,7 +209,7 @@ def _log_signed_url_data_info(
         f"acl={acl} authz={authz} bucket={bucket} user_sub={user_sub} client_id={client_id}"
     )
 
-    metrics.add_signed_url_event(
+    current_app.metrics.add_signed_url_event(
         action,
         protocol,
         acl,

diff --git a/fence/blueprints/login/base.py b/fence/blueprints/login/base.py
@@ -7,7 +7,6 @@
 from fence.blueprints.login.redirect import validate_redirect
 from fence.config import config
 from fence.errors import UserError
-from fence.metrics import metrics
 
 logger = get_logger(__name__)
 
@@ -134,7 +133,7 @@ def get(self):
 
     def post_login(self, user=None, token_result=None, **kwargs):
         prepare_login_log(self.idp_name)
-        metrics.add_login_event(
+        flask.current_app.metrics.add_login_event(
             user_sub=flask.g.user.id,
             idp=self.idp_name,
             fence_idp=flask.session.get("fence_idp"),

diff --git a/fence/metrics.py b/fence/metrics.py
@@ -1,18 +1,20 @@
 """
 Metrics are collected by the Prometheus client and exposed at the `/metrics` endpoint.
 
+This defines a class which can be extended and instantiated at the web app-level. For flask, this is
+stored on the `app` object.
+
 To add a new metric:
 - Add a new method to the `Metrics` class below (see `add_login_event` and `add_signed_url_event`
 for example).
 - The new method should call the `_increment_counter` and/or `_set_gauge` methods with the
 appropriate metric name and labels.
 - Call the new method from the code where relevant, for example:
-    from fence.metric import metrics
-    metrics.add_login_event(...)
+    from flask import current_app
+    current_app.metrics.add_login_event(...)
 - Add unit tests to the `tests/test_metrics` file.
 """
 
-
 import os
 import pathlib
 
@@ -26,24 +28,41 @@
     CONTENT_TYPE_LATEST,
 )
 
-from fence.config import config
-
 
 logger = get_logger(__name__)
 
 
-class Metrics:
+class Metrics(object):
     """
     Class to handle Prometheus metrics
     Attributes:
-        registry (CollectorRegistry): Prometheus registry
+        _registry (CollectorRegistry): Prometheus registry
         metrics (dict): Dictionary to store Prometheus metrics
     """
 
-    def __init__(self, prometheus_dir="/var/tmp/uwsgi_flask_metrics"):
+    def __init__(self, enabled=True, prometheus_dir="/var/tmp/prometheus_metrics"):
+        """
+        Create a metrics class.
+
+        Args:
+            enabled (bool): If this is false, the class functions will be no-ops (no operations), effectively
+                            doing nothing. This is the behavior when metrics are disabled. Why? So application code
+                            doesn't have to check, it always tries to log a metric.
+            prometheus_dir (str): Directory to use when setting PROMETHEUS_MULTIPROC_DIR env var (which prometheus requires
+                                  for multiprocess metrics collection). Note that this the prometheus client is very
-                                  for multiprocess metrics collection). Note that this the prometheus client is very
+                                  for multiprocess metrics collection). Note that the prometheus client is very
-                                  for multiprocess metrics collection). Note that this the prometheus client is very
+                                  for multiprocess metrics collection). Note that the prometheus client is very
+                                  finicky about when the ENV var is set.
+        """
+        self.enabled = enabled
+        if not enabled:
+            return
+
         pathlib.Path(prometheus_dir).mkdir(parents=True, exist_ok=True)
         os.environ["PROMETHEUS_MULTIPROC_DIR"] = prometheus_dir
 
+        logger.info(
+            f"PROMETHEUS_MULTIPROC_DIR is {os.environ['PROMETHEUS_MULTIPROC_DIR']}"
+        )
+
         self._registry = CollectorRegistry()
         multiprocess.MultiProcessCollector(self._registry)
         self._metrics = {}
@@ -67,8 +86,8 @@ def get_latest_metrics(self):
         """
         # When metrics gathering is not enabled, the metrics endpoint should not error, but it should
         # not return any data.
-        if not config["ENABLE_PROMETHEUS_METRICS"]:
-            return "", None
+        if not self.enabled:
+            return "", CONTENT_TYPE_LATEST
 
         return generate_latest(self._registry), CONTENT_TYPE_LATEST
 
@@ -88,7 +107,7 @@ def _increment_counter(self, name, labels):
                 f"Creating counter '{name}' with description '{description}' and labels: {labels}"
             )
             self._metrics[name] = Counter(name, description, [*labels.keys()])
-        elif type(self._metrics[name]) != Counter:
+        elif type(self._metrics[name]) is not Counter:
             raise ValueError(
                 f"Trying to create counter '{name}' but a {type(self._metrics[name])} with this name already exists"
             )
@@ -113,7 +132,7 @@ def _set_gauge(self, name, labels, value):
                 f"Creating gauge '{name}' with description '{description}' and labels: {labels}"
             )
             self._metrics[name] = Gauge(name, description, [*labels.keys()])
-        elif type(self._metrics[name]) != Gauge:
+        elif type(self._metrics[name]) is not Gauge:
             raise ValueError(
                 f"Trying to create gauge '{name}' but a {type(self._metrics[name])} with this name already exists"
             )
@@ -125,7 +144,7 @@ def add_login_event(self, user_sub, idp, fence_idp, shib_idp, client_id):
         """
         Record a login event
         """
-        if not config["ENABLE_PROMETHEUS_METRICS"]:
+        if not self.enabled:
             return
         self._increment_counter(
             "gen3_fence_login",
@@ -164,7 +183,7 @@ def add_signed_url_event(
         """
         Record a signed URL event
         """
-        if not config["ENABLE_PROMETHEUS_METRICS"]:
+        if not self.enabled:
             return
         self._increment_counter(
             "gen3_fence_presigned_url",
@@ -193,7 +212,3 @@ def add_signed_url_event(
             },
             size_in_kibibytes,
         )
-
-
-# Initialize the Metrics instance
-metrics = Metrics()
diff --git a/poetry.lock b/poetry.lock