From ad7ecb4ddde1e4d1e7656be5615c7c614411d669 Mon Sep 17 00:00:00 2001 From: Enol Fernandez Date: Fri, 22 Nov 2024 11:48:21 +0000 Subject: [PATCH 01/13] Add a site accounting tool --- fedcloud_vm_monitoring/accounting.py | 49 ++++++ fedcloud_vm_monitoring/data/vos.yaml | 146 ++++++++++++++++++ fedcloud_vm_monitoring/goc.py | 79 ++++++++++ fedcloud_vm_monitoring/site_accounting_cli.py | 70 +++++++++ 4 files changed, 344 insertions(+) create mode 100644 fedcloud_vm_monitoring/accounting.py create mode 100644 fedcloud_vm_monitoring/data/vos.yaml create mode 100644 fedcloud_vm_monitoring/goc.py create mode 100644 fedcloud_vm_monitoring/site_accounting_cli.py diff --git a/fedcloud_vm_monitoring/accounting.py b/fedcloud_vm_monitoring/accounting.py new file mode 100644 index 0000000..877075b --- /dev/null +++ b/fedcloud_vm_monitoring/accounting.py @@ -0,0 +1,49 @@ +"""Class for interaction with the accounting portal""" + +import numbers +import datetime +import httpx + +ACCOUNTING_URL = "https://accounting.egi.eu/" +SITE_VO_ACCOUNTING = "cloud/sum_elap_processors/SITE/VO/{start_year}/{start_month}/{end_year}/{end_month}/all/onlyinfrajobs/JSON/" + +class Accounting: + def __init__(self): + self._data = {} + + def _get_accounting_data(self): + today = datetime.date.today() + start = today - datetime.timedelta(days=90) + print(start) + url = ACCOUNTING_URL + SITE_VO_ACCOUNTING.format( + start_year=start.year, start_month=start.month, + end_year=today.year, end_month=today.month, + ) + # accounting generates a redirect here + r = httpx.get(url, follow_redirects=True) + self._data = r.json() + return self._data + + def site_vos(self, site): + if not self._data: + self._get_accounting_data() + for col in self._data: + if col["id"] == site: + return set( + [ + vo[0] + for vo in col.items() + if isinstance(vo[1], numbers.Number) + and vo[1] != 0 + and vo[0] not in ["Total", "Percent"] + ] + ) + return set([]) + + def all_sites(self): + if not self._data: + self._get_accounting_data() + for col in self._data: + if col["id"] == "xlegend": + return [site[1] for site in col.items() if site[0] != "id"] + return [] diff --git a/fedcloud_vm_monitoring/data/vos.yaml b/fedcloud_vm_monitoring/data/vos.yaml new file mode 100644 index 0000000..d47be82 --- /dev/null +++ b/fedcloud_vm_monitoring/data/vos.yaml @@ -0,0 +1,146 @@ +D4SCIENCE: + - d4science.org +WENMR: + - enmr.eu +OBSEA: + - vo.obsea.es +NBISBILS: + - vo.nbis.se +MSO4SC: + - imath.cesga.es +BIOISI: + - bioisi +TERRADUE: + - geohazards.terradue.com +GEODAB: + - vo.geoss.eu +CLARIN: + - vo.clarin.eu +DEIMOS: + - vo.nextgeoss.eu +EMSOERIC: + - vo.emso-eric.eu +TrainingInfrastructure: + - training.egi.eu +Notebooks: + - vo.notebooks.egi.eu +AoD: + - vo.access.egi.eu +EXTraS: + - extras-fp7.eu +Fusion: + - fusion +LSGC: + - biomed +MRILab: +Peachnote: + - peachnote.com +OPENBIOMAP: + - vo.openbiomaps.org +IIASA: +EMPHASIS: + - vo.emphasisproject.eu +EOSCSYNERGY: + - eosc-synergy.eu + - worsica.vo.incd.pt + - cryoem.instruct-eric.eu + - lagoproject.net + - umsa.certi-sc-cz + - mswss.ui.savbka.sk + - o3as.data.kit.edu +VESPA: + - vo.europlanet-vespa.eu +ECRIN: + - vo.crmdr.org +STARS4ALL: + - vo.stars4all.eu +GOSAFE: + - gosafe.eng.it +DIGITBRAIN: + - vo.digitbrain.eu +BELLE2: + - belle +OPERAS: + - vo.operas-eu.org +DEEP: + - deep-hybrid-datacloud.eu +POLICYCLOUD: + - vo.ai4publicpolicy.eu +CSCALE: + - aquamonitor.c-scale.eu + - eval.c-scale.eu + - terrascope.c-scale.eu + - waterwatch.c-scale.eu + # these below are actually removed + - HighResLandSurf.c-scale.eu + - return.c-scale.eu + - hisea.c-scale.eu + - coastmonitor.c-scale.eu + - in-sar-cubes.c-scale.eu + - plankton.c-scale.eu + - lost-salvage.c-scale.eu + - gltfca.c-scale.eu + - geohazards.c-scale.eu + - pangeo.c-scale.eu +EISCAT3D: + - eiscat.se +BINARE: + - vo.binare-oy.eu +COS4CLOUD: + - cos4cloud-eosc.eu +PEROVSKITE: + - perla-pv.ro +ENES: + - vo.enes.org +MINKE: + - minka-sdg.org +BD4NRG: + - vo.bd4nrg.eu +PITHIANRF: + - vo.esc.pithia.eu + - vo.pithia.eu +CEITEC: [] +PLOCAN: + - vo.plocan.eu +ENVRIFAIR: + - vo.envri-fair.eu +ICECUBE: + - icecube +MATRYCS: + - vo.matrycs.eu +OPENCOASTS: + - opencoast.eosc-hub.eu +AIIDALAB: + - vo.max-centre.eu +PANGEO: + - vo.pangeo.eu +LETHE: + - vo.lethe-project.eu +LATITUDO40: + - vo.latitudo40.com.eu +EUROSCIENCEGATEWAY: + - vo.usegalaxy.eu +CESSDA: + - vo.cessda.eduteams.org +SEADATANET: + - vo.seadatanet.org +ERIES: + - vo.eries.eu +EUROSEA: + - vo.eurosea.marine.ie +ANERIS: + - vo.aneris.eu +OPENRISKNET: + - openrisknet.org +OIPUB: + - vo.oipub.com +DECIDO: + - vo.decido-project.eu +AI4PUBLICPOLICY: + - vo.ai4publicpolicy.eu +PSMA: + - vo.radiotracers4psma.eu +EUREKA3D: + - culturalheritage.vo.egi.eu +NEURODESK: + - vo.neurodesk.eu diff --git a/fedcloud_vm_monitoring/goc.py b/fedcloud_vm_monitoring/goc.py new file mode 100644 index 0000000..15628c0 --- /dev/null +++ b/fedcloud_vm_monitoring/goc.py @@ -0,0 +1,79 @@ +"""Classes to interact with the GOCDB""" + +import re +import datetime +import httpx +import pprint +import xmltodict +import numbers +import yaml + +GOC_PUBLIC_URL = "https://goc.egi.eu/gocdbpi/public/" +GOC_PRIVATE_URL = "https://goc.egi.eu/gocdbpi/private/" +SERVICE_TYPES = ["org.openstack.nova"] +SLA_GROUP_RE = r"EGI_(.*)_SLA" + +class GOCDB: + def __init__(self): + self._cache = {} + self.queries = 0 + self.sla_vos = set() + + def get_sla_groups(self, cert_file, scope="EGI,SLA"): + client = httpx.Client(cert=cert_file) + params = {"method": "get_service_group", "scope": scope} + response = client.get(GOC_PRIVATE_URL, params=params) + self.queries += 1 + groups = xmltodict.parse(response.text)["results"]["SERVICE_GROUP"] + return groups + + def get_sites_slas(self, cert_file, vo_map): + groups = self.get_sla_groups(cert_file) + all_vos = [] + for vo in vo_map.values(): + if vo: + all_vos.extend(vo) + self.sla_vos = set(all_vos) + + sites = {} + for group in groups: + m = re.search(SLA_GROUP_RE, group["NAME"]) + if not m: + continue + sla_name = m.group(1) + vos = vo_map.get(sla_name) + endpoints = group.get("SERVICE_ENDPOINT", []) + if not isinstance(endpoints, list): + endpoints = [endpoints] + for endpoint in endpoints: + svc = self.get_endpoint_site(endpoint) + if svc: + for site in svc["SITENAME"]: + site_info = sites.get("site", dict()) + site_info[sla_name] = { + "vos": set(vos or []) + } + sites[site] = site_info + return sites + + def get_endpoint_site(self, endpoint): + key = endpoint["@PRIMARY_KEY"] + service = {} + if key in self._cache: + return self._cache[key] + if endpoint.get("SERVICE_TYPE", "") not in SERVICE_TYPES: + return None + params = {"method": "get_service"} + if "HOSTNAME" in endpoint: + params["hostname"] = endpoint["HOSTNAME"] + if "SERVICE_TYPE" in endpoint: + params["service_type"] = endpoint["SERVICE_TYPE"] + r = httpx.get(GOC_PUBLIC_URL, params=params) + self.queries += 1 + if r.text: + results = xmltodict.parse(r.text).get("results", {}) + if results: + service = results.get("SERVICE_ENDPOINT", {}) + if service: + self._cache[key] = service + return service diff --git a/fedcloud_vm_monitoring/site_accounting_cli.py b/fedcloud_vm_monitoring/site_accounting_cli.py new file mode 100644 index 0000000..716830e --- /dev/null +++ b/fedcloud_vm_monitoring/site_accounting_cli.py @@ -0,0 +1,70 @@ +"""Monitor Accounting status""" + +import click +import importlib +from fedcloud_vm_monitoring.appdb import AppDB +from fedcloud_vm_monitoring.site_monitor import SiteMonitor, SiteMonitorException +from fedcloudclient.decorators import oidc_params +from fedcloudclient.sites import list_sites + +import yaml + +from fedcloud_vm_monitoring.accounting import Accounting +from fedcloud_vm_monitoring.goc import GOCDB + +def check_site_slas(site, site_slas, goc, acct): + click.echo(f"[-] Checking accounting for site {site}") + sla_vos = set() + if site not in site_slas: + click.echo(f"[I] {site} is not present in any SLA") + else: + for sla_name, sla in site_slas[site].items(): + sla_vos = sla_vos.union(sla["vos"]) + accounted_vos = sla["vos"].intersection(acct.site_vos(site)) + if accounted_vos: + click.echo( + f"[OK] SITE {site} has accouting info for SLA {sla_name} ({accounted_vos})" + ) + else: + click.echo(f"[ERR] SITE {site} has no accouting info for SLA {sla_name}") + click.echo("[-] Checking aditional VOs") + # Now check which VOs are being reported without a SLA + if not sla_vos: + sla_vos = goc.sla_vos + non_sla_vos = acct.site_vos(site) - sla_vos.union(set(["ops"])) + if non_sla_vos: + click.echo( + f"[W] Site {site} has accounting for VOs {non_sla_vos}, non covered by SLA" + ) + if "ops" not in acct.site_vos(site): + click.echo( + f"[W] SITE {site} has accounting for ops" + ) + + + +@click.command() +@click.option("--site", help="Site to check") +@click.option("--user-cert", required=True, help="User certificate (for GOCDB queries)") +@click.option("--vo-map-file", help="SLA-VO mapping file") +def main( + site, + user_cert, + vo_map_file, +): + if vo_map_file: + with open(vo_map_file) as f: + vo_map_src = f.read() + else: + vo_map_src = importlib.resources.read_text("fedcloud_vm_monitoring.data", "vos.yaml") + vo_map = yaml.load(vo_map_src, Loader=yaml.SafeLoader) + acct = Accounting() + goc = GOCDB() + slas = goc.get_sites_slas(user_cert, vo_map) + click.echo("[-] Checking accounting over the last month...") + + if site: + check_site_slas(site, slas, goc, acct) + else: + for site in acct.all_sites(): + check_site_slas(site, slas, goc, acct) From ff9d587aaa520c089cf736a05677658bc5865a24 Mon Sep 17 00:00:00 2001 From: Enol Fernandez Date: Fri, 22 Nov 2024 11:48:46 +0000 Subject: [PATCH 02/13] Renamed cli to vm_monitor_cli to avoid conflicts --- fedcloud_vm_monitoring/{cli.py => vm_monitor_cli.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename fedcloud_vm_monitoring/{cli.py => vm_monitor_cli.py} (100%) diff --git a/fedcloud_vm_monitoring/cli.py b/fedcloud_vm_monitoring/vm_monitor_cli.py similarity index 100% rename from fedcloud_vm_monitoring/cli.py rename to fedcloud_vm_monitoring/vm_monitor_cli.py From 0283a10c422540d5c5c90c3cd8c4e7a7949f0549 Mon Sep 17 00:00:00 2001 From: Enol Fernandez Date: Fri, 22 Nov 2024 11:49:04 +0000 Subject: [PATCH 03/13] Add new depedencies and tool --- pyproject.toml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index bcba9f9..d71de3b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,9 +5,14 @@ description = "Monitoring fedcloud VMs and sites" authors = ["Giuseppe La Rocca ", "Enol Fernandez "] readme = "README.md" +include = [ + { path = "fedcloud_vm_monitoring/data", format = ["sdist", "wheel"] } +] [tool.poetry.scripts] -fedcloud-vo-monitor = "fedcloud_vm_monitoring.cli:main" +fedcloud-vo-monitor = "fedcloud_vm_monitoring.vm_monitor_cli:main" +fedcloud-site-accountoung = "fedcloud_vm_monitoring.site_accounting_cli:main" + [tool.poetry.dependencies] python = "^3.9" @@ -15,6 +20,8 @@ fedcloudclient = "^1.4.3" ldap3 = "^2.9.1" python-dateutil = "^2.9.0.post0" paramiko = "^3.4.0" +httpx = "^0.27.2" +xmltodict = "^0.14.2" [build-system] requires = ["poetry-core"] From 664374cdcef20752770c8dee209a618053d8570b Mon Sep 17 00:00:00 2001 From: Enol Fernandez Date: Fri, 22 Nov 2024 11:49:22 +0000 Subject: [PATCH 04/13] Lock dependencies --- poetry.lock | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 117 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index a03427d..c27a0ec 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,26 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. + +[[package]] +name = "anyio" +version = "4.6.2.post1" +description = "High level compatibility layer for multiple asynchronous event loop implementations" +optional = false +python-versions = ">=3.9" +files = [ + {file = "anyio-4.6.2.post1-py3-none-any.whl", hash = "sha256:6d170c36fba3bdd840c73d3868c1e777e33676a69c3a72cf0a0d5d6d8009b61d"}, + {file = "anyio-4.6.2.post1.tar.gz", hash = "sha256:4c8bc31ccdb51c7f7bd251f51c609e038d63e34219b44aa86e47576389880b4c"}, +] + +[package.dependencies] +exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""} +idna = ">=2.8" +sniffio = ">=1.1" +typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""} + +[package.extras] +doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] +test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21.0b1)"] +trio = ["trio (>=0.26.1)"] [[package]] name = "attrs" @@ -444,6 +466,20 @@ typing_extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} [package.extras] pifpaf = ["pifpaf (>=2.5.0)", "setuptools"] +[[package]] +name = "exceptiongroup" +version = "1.2.2" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, + {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, +] + +[package.extras] +test = ["pytest (>=6)"] + [[package]] name = "fedcloudclient" version = "1.4.3" @@ -472,6 +508,63 @@ requests = "2.31.0" setuptools = "68.2.2" tabulate = "0.9.0" +[[package]] +name = "h11" +version = "0.14.0" +description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +optional = false +python-versions = ">=3.7" +files = [ + {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, + {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, +] + +[[package]] +name = "httpcore" +version = "1.0.7" +description = "A minimal low-level HTTP client." +optional = false +python-versions = ">=3.8" +files = [ + {file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"}, + {file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"}, +] + +[package.dependencies] +certifi = "*" +h11 = ">=0.13,<0.15" + +[package.extras] +asyncio = ["anyio (>=4.0,<5.0)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] +trio = ["trio (>=0.22.0,<1.0)"] + +[[package]] +name = "httpx" +version = "0.27.2" +description = "The next generation HTTP client." +optional = false +python-versions = ">=3.8" +files = [ + {file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"}, + {file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"}, +] + +[package.dependencies] +anyio = "*" +certifi = "*" +httpcore = "==1.*" +idna = "*" +sniffio = "*" + +[package.extras] +brotli = ["brotli", "brotlicffi"] +cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] +zstd = ["zstandard (>=0.18.0)"] + [[package]] name = "hvac" version = "2.0.0" @@ -1502,6 +1595,17 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +[[package]] +name = "sniffio" +version = "1.3.1" +description = "Sniff out which async library your code is running under" +optional = false +python-versions = ">=3.7" +files = [ + {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, + {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, +] + [[package]] name = "stevedore" version = "5.3.0" @@ -1659,6 +1763,17 @@ files = [ {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, ] +[[package]] +name = "xmltodict" +version = "0.14.2" +description = "Makes working with XML feel like you are working with JSON" +optional = false +python-versions = ">=3.6" +files = [ + {file = "xmltodict-0.14.2-py2.py3-none-any.whl", hash = "sha256:20cc7d723ed729276e808f26fb6b3599f786cbc37e06c65e192ba77c40f20aac"}, + {file = "xmltodict-0.14.2.tar.gz", hash = "sha256:201e7c28bb210e374999d1dde6382923ab0ed1a8a5faeece48ab525b7810a553"}, +] + [[package]] name = "zipp" version = "3.20.0" @@ -1677,4 +1792,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "2043f2b11dfea1d6cbc783d2a222cd09cf6424c89793177290df798f414856de" +content-hash = "20cd29c64145eeddcb0bef316cf96218b42c61ad5c4ae5cf2cb53e23f42ed539" From 827963add11b05bf0341462e867abbf5623cdad3 Mon Sep 17 00:00:00 2001 From: Enol Fernandez Date: Fri, 22 Nov 2024 12:22:17 +0000 Subject: [PATCH 05/13] Linting fixes --- fedcloud_vm_monitoring/accounting.py | 10 ++++--- fedcloud_vm_monitoring/goc.py | 12 ++++----- fedcloud_vm_monitoring/site_accounting_cli.py | 26 ++++++++++--------- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/fedcloud_vm_monitoring/accounting.py b/fedcloud_vm_monitoring/accounting.py index 877075b..3659103 100644 --- a/fedcloud_vm_monitoring/accounting.py +++ b/fedcloud_vm_monitoring/accounting.py @@ -1,12 +1,14 @@ """Class for interaction with the accounting portal""" -import numbers import datetime +import numbers + import httpx ACCOUNTING_URL = "https://accounting.egi.eu/" SITE_VO_ACCOUNTING = "cloud/sum_elap_processors/SITE/VO/{start_year}/{start_month}/{end_year}/{end_month}/all/onlyinfrajobs/JSON/" + class Accounting: def __init__(self): self._data = {} @@ -16,8 +18,10 @@ def _get_accounting_data(self): start = today - datetime.timedelta(days=90) print(start) url = ACCOUNTING_URL + SITE_VO_ACCOUNTING.format( - start_year=start.year, start_month=start.month, - end_year=today.year, end_month=today.month, + start_year=start.year, + start_month=start.month, + end_year=today.year, + end_month=today.month, ) # accounting generates a redirect here r = httpx.get(url, follow_redirects=True) diff --git a/fedcloud_vm_monitoring/goc.py b/fedcloud_vm_monitoring/goc.py index 15628c0..2f6ca8a 100644 --- a/fedcloud_vm_monitoring/goc.py +++ b/fedcloud_vm_monitoring/goc.py @@ -1,11 +1,12 @@ """Classes to interact with the GOCDB""" -import re import datetime -import httpx +import numbers +import re import pprint + +import httpx import xmltodict -import numbers import yaml GOC_PUBLIC_URL = "https://goc.egi.eu/gocdbpi/public/" @@ -13,6 +14,7 @@ SERVICE_TYPES = ["org.openstack.nova"] SLA_GROUP_RE = r"EGI_(.*)_SLA" + class GOCDB: def __init__(self): self._cache = {} @@ -50,9 +52,7 @@ def get_sites_slas(self, cert_file, vo_map): if svc: for site in svc["SITENAME"]: site_info = sites.get("site", dict()) - site_info[sla_name] = { - "vos": set(vos or []) - } + site_info[sla_name] = {"vos": set(vos or [])} sites[site] = site_info return sites diff --git a/fedcloud_vm_monitoring/site_accounting_cli.py b/fedcloud_vm_monitoring/site_accounting_cli.py index 716830e..bf962e9 100644 --- a/fedcloud_vm_monitoring/site_accounting_cli.py +++ b/fedcloud_vm_monitoring/site_accounting_cli.py @@ -1,21 +1,22 @@ """Monitor Accounting status""" -import click import importlib + +import click +import yaml + +from fedcloud_vm_monitoring.accounting import Accounting from fedcloud_vm_monitoring.appdb import AppDB +from fedcloud_vm_monitoring.goc import GOCDB from fedcloud_vm_monitoring.site_monitor import SiteMonitor, SiteMonitorException from fedcloudclient.decorators import oidc_params from fedcloudclient.sites import list_sites -import yaml - -from fedcloud_vm_monitoring.accounting import Accounting -from fedcloud_vm_monitoring.goc import GOCDB def check_site_slas(site, site_slas, goc, acct): click.echo(f"[-] Checking accounting for site {site}") sla_vos = set() - if site not in site_slas: + if site not in site_slas: click.echo(f"[I] {site} is not present in any SLA") else: for sla_name, sla in site_slas[site].items(): @@ -26,7 +27,9 @@ def check_site_slas(site, site_slas, goc, acct): f"[OK] SITE {site} has accouting info for SLA {sla_name} ({accounted_vos})" ) else: - click.echo(f"[ERR] SITE {site} has no accouting info for SLA {sla_name}") + click.echo( + f"[ERR] SITE {site} has no accouting info for SLA {sla_name}" + ) click.echo("[-] Checking aditional VOs") # Now check which VOs are being reported without a SLA if not sla_vos: @@ -37,10 +40,7 @@ def check_site_slas(site, site_slas, goc, acct): f"[W] Site {site} has accounting for VOs {non_sla_vos}, non covered by SLA" ) if "ops" not in acct.site_vos(site): - click.echo( - f"[W] SITE {site} has accounting for ops" - ) - + click.echo(f"[W] SITE {site} has accounting for ops") @click.command() @@ -56,7 +56,9 @@ def main( with open(vo_map_file) as f: vo_map_src = f.read() else: - vo_map_src = importlib.resources.read_text("fedcloud_vm_monitoring.data", "vos.yaml") + vo_map_src = importlib.resources.read_text( + "fedcloud_vm_monitoring.data", "vos.yaml" + ) vo_map = yaml.load(vo_map_src, Loader=yaml.SafeLoader) acct = Accounting() goc = GOCDB() From d2d21652cda9a4525ed59a4103a384443416ea06 Mon Sep 17 00:00:00 2001 From: Enol Fernandez Date: Fri, 22 Nov 2024 12:46:50 +0000 Subject: [PATCH 06/13] Linting fixes --- fedcloud_vm_monitoring/accounting.py | 4 +++- fedcloud_vm_monitoring/goc.py | 4 ---- fedcloud_vm_monitoring/site_accounting_cli.py | 4 ---- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/fedcloud_vm_monitoring/accounting.py b/fedcloud_vm_monitoring/accounting.py index 3659103..e1ab802 100644 --- a/fedcloud_vm_monitoring/accounting.py +++ b/fedcloud_vm_monitoring/accounting.py @@ -6,7 +6,9 @@ import httpx ACCOUNTING_URL = "https://accounting.egi.eu/" -SITE_VO_ACCOUNTING = "cloud/sum_elap_processors/SITE/VO/{start_year}/{start_month}/{end_year}/{end_month}/all/onlyinfrajobs/JSON/" +SITE_VO_ACCOUNTING = ("cloud/sum_elap_processors/SITE/VO/" + "{start_year}/{start_month}/{end_year}/{end_month}" + "/all/onlyinfrajobs/JSON/") class Accounting: diff --git a/fedcloud_vm_monitoring/goc.py b/fedcloud_vm_monitoring/goc.py index 2f6ca8a..b313da1 100644 --- a/fedcloud_vm_monitoring/goc.py +++ b/fedcloud_vm_monitoring/goc.py @@ -1,13 +1,9 @@ """Classes to interact with the GOCDB""" -import datetime -import numbers import re -import pprint import httpx import xmltodict -import yaml GOC_PUBLIC_URL = "https://goc.egi.eu/gocdbpi/public/" GOC_PRIVATE_URL = "https://goc.egi.eu/gocdbpi/private/" diff --git a/fedcloud_vm_monitoring/site_accounting_cli.py b/fedcloud_vm_monitoring/site_accounting_cli.py index bf962e9..2fb44d9 100644 --- a/fedcloud_vm_monitoring/site_accounting_cli.py +++ b/fedcloud_vm_monitoring/site_accounting_cli.py @@ -6,11 +6,7 @@ import yaml from fedcloud_vm_monitoring.accounting import Accounting -from fedcloud_vm_monitoring.appdb import AppDB from fedcloud_vm_monitoring.goc import GOCDB -from fedcloud_vm_monitoring.site_monitor import SiteMonitor, SiteMonitorException -from fedcloudclient.decorators import oidc_params -from fedcloudclient.sites import list_sites def check_site_slas(site, site_slas, goc, acct): From b7200159eb4a5bbaa1bad30aa1e7cf5dd5f0c485 Mon Sep 17 00:00:00 2001 From: Enol Fernandez Date: Fri, 22 Nov 2024 12:58:11 +0000 Subject: [PATCH 07/13] More linting fixes --- fedcloud_vm_monitoring/accounting.py | 8 +++++--- fedcloud_vm_monitoring/site_accounting_cli.py | 1 - 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fedcloud_vm_monitoring/accounting.py b/fedcloud_vm_monitoring/accounting.py index e1ab802..adc8427 100644 --- a/fedcloud_vm_monitoring/accounting.py +++ b/fedcloud_vm_monitoring/accounting.py @@ -6,9 +6,11 @@ import httpx ACCOUNTING_URL = "https://accounting.egi.eu/" -SITE_VO_ACCOUNTING = ("cloud/sum_elap_processors/SITE/VO/" - "{start_year}/{start_month}/{end_year}/{end_month}" - "/all/onlyinfrajobs/JSON/") +SITE_VO_ACCOUNTING = ( + "cloud/sum_elap_processors/SITE/VO/" + "{start_year}/{start_month}/{end_year}/{end_month}" + "/all/onlyinfrajobs/JSON/" +) class Accounting: diff --git a/fedcloud_vm_monitoring/site_accounting_cli.py b/fedcloud_vm_monitoring/site_accounting_cli.py index 2fb44d9..df86ecb 100644 --- a/fedcloud_vm_monitoring/site_accounting_cli.py +++ b/fedcloud_vm_monitoring/site_accounting_cli.py @@ -4,7 +4,6 @@ import click import yaml - from fedcloud_vm_monitoring.accounting import Accounting from fedcloud_vm_monitoring.goc import GOCDB From 1323a11e6478a9d31b7350f7b7f1b8be8c5b6a8c Mon Sep 17 00:00:00 2001 From: Enol Fernandez Date: Tue, 26 Nov 2024 10:31:27 +0000 Subject: [PATCH 08/13] Rename file --- .../{site_accounting_cli.py => site_sla_cli.py} | 0 pyproject.toml | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename fedcloud_vm_monitoring/{site_accounting_cli.py => site_sla_cli.py} (100%) diff --git a/fedcloud_vm_monitoring/site_accounting_cli.py b/fedcloud_vm_monitoring/site_sla_cli.py similarity index 100% rename from fedcloud_vm_monitoring/site_accounting_cli.py rename to fedcloud_vm_monitoring/site_sla_cli.py diff --git a/pyproject.toml b/pyproject.toml index d71de3b..921eaa8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ include = [ [tool.poetry.scripts] fedcloud-vo-monitor = "fedcloud_vm_monitoring.vm_monitor_cli:main" -fedcloud-site-accountoung = "fedcloud_vm_monitoring.site_accounting_cli:main" +fedcloud-site-sla = "fedcloud_vm_monitoring.site_sla_cli:main" [tool.poetry.dependencies] From 1daac304454f41695be33ce72b40fb4d6e638d7b Mon Sep 17 00:00:00 2001 From: Enol Fernandez Date: Tue, 26 Nov 2024 11:07:00 +0000 Subject: [PATCH 09/13] Add AppDB as source of info --- README.md | 4 +-- fedcloud_vm_monitoring/accounting.py | 3 +- fedcloud_vm_monitoring/appdb.py | 39 +++++++++++++++++++----- fedcloud_vm_monitoring/goc.py | 8 ++--- fedcloud_vm_monitoring/site_sla_cli.py | 31 +++++++++++++++---- fedcloud_vm_monitoring/vm_monitor_cli.py | 4 +-- 6 files changed, 66 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index b2b3afe..48fb30a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# fedcloud-vm-monitoring +# fedcloud-monitoring-tools -This repository contains a Python tool to monitor usage of EGI FedCloud +This repository contains a set of Python tools to monitor usage of EGI FedCloud providers and remove long-running instances. The clients work with OpenStack cloud providers supporting the OIDC protocol. diff --git a/fedcloud_vm_monitoring/accounting.py b/fedcloud_vm_monitoring/accounting.py index adc8427..f06c563 100644 --- a/fedcloud_vm_monitoring/accounting.py +++ b/fedcloud_vm_monitoring/accounting.py @@ -12,15 +12,14 @@ "/all/onlyinfrajobs/JSON/" ) - class Accounting: def __init__(self): self._data = {} def _get_accounting_data(self): + """Gets accounting data for sites / vos over the last 90 days""" today = datetime.date.today() start = today - datetime.timedelta(days=90) - print(start) url = ACCOUNTING_URL + SITE_VO_ACCOUNTING.format( start_year=start.year, start_month=start.month, diff --git a/fedcloud_vm_monitoring/appdb.py b/fedcloud_vm_monitoring/appdb.py index ec58a16..a2ccb0c 100644 --- a/fedcloud_vm_monitoring/appdb.py +++ b/fedcloud_vm_monitoring/appdb.py @@ -2,7 +2,7 @@ import requests -site_query = """ +sites_supporting_vo_query = """ { sites(filter: {cloudComputingShares: {VO: {eq: "%s"}}}) { items { @@ -11,16 +11,28 @@ } }""" +vos_in_site_query = """ +{ + sites(filter: {name: {eq: "%s"}}) { + items { + cloudComputingShares { + items { + VO + } + } + } + } +}""" + class AppDB: graphql_url = "https://is.appdb.egi.eu/graphql" - def __init__(self, vo): - self.vo = vo + def __init__(self): self.sites = {} - def get_sites_for_vo(self): - params = {"query": site_query % self.vo} + def get_sites_for_vo(self, vo): + params = {"query": sites_supporting_vo_query % vo} r = requests.get( self.graphql_url, params=params, headers={"accept": "application/json"} ) @@ -28,7 +40,20 @@ def get_sites_for_vo(self): data = r.json()["data"]["sites"]["items"] return [i["name"] for i in data] - def vo_check(self, site): + def vo_check(self, site, vo): if not self.sites: - self.sites = self.get_sites_for_vo() + self.sites = self.get_sites_for_vo(vo) return site in self.sites + + def get_vo_for_site(self, site): + params = {"query": vos_in_site_query % site} + r = requests.get( + self.graphql_url, params=params, headers={"accept": "application/json"} + ) + r.raise_for_status() + sites_items = r.json()["data"]["sites"]["items"] + if sites_items: + data = sites_items.pop()["cloudComputingShares"]["items"] + else: + return [] + return [i["VO"] for i in data] diff --git a/fedcloud_vm_monitoring/goc.py b/fedcloud_vm_monitoring/goc.py index b313da1..e18ebd6 100644 --- a/fedcloud_vm_monitoring/goc.py +++ b/fedcloud_vm_monitoring/goc.py @@ -46,10 +46,10 @@ def get_sites_slas(self, cert_file, vo_map): for endpoint in endpoints: svc = self.get_endpoint_site(endpoint) if svc: - for site in svc["SITENAME"]: - site_info = sites.get("site", dict()) - site_info[sla_name] = {"vos": set(vos or [])} - sites[site] = site_info + site = svc["SITENAME"] + site_info = sites.get("site", dict()) + site_info[sla_name] = {"vos": set(vos or [])} + sites[site] = site_info return sites def get_endpoint_site(self, endpoint): diff --git a/fedcloud_vm_monitoring/site_sla_cli.py b/fedcloud_vm_monitoring/site_sla_cli.py index df86ecb..e53f488 100644 --- a/fedcloud_vm_monitoring/site_sla_cli.py +++ b/fedcloud_vm_monitoring/site_sla_cli.py @@ -5,12 +5,14 @@ import click import yaml from fedcloud_vm_monitoring.accounting import Accounting +from fedcloud_vm_monitoring.appdb import AppDB from fedcloud_vm_monitoring.goc import GOCDB -def check_site_slas(site, site_slas, goc, acct): - click.echo(f"[-] Checking accounting for site {site}") +def check_site_slas(site, site_slas, goc, acct, appdb): + click.echo(f"[-] Checking site {site}") sla_vos = set() + appdb_vos = set(appdb.get_vo_for_site(site)) if site not in site_slas: click.echo(f"[I] {site} is not present in any SLA") else: @@ -25,6 +27,15 @@ def check_site_slas(site, site_slas, goc, acct): click.echo( f"[ERR] SITE {site} has no accouting info for SLA {sla_name}" ) + info_vos = sla["vos"].intersection(appdb_vos) + if info_vos: + click.echo( + f"[OK] SITE {site} has configured {info_vos} for SLA {sla_name}" + ) + else: + click.echo( + f"[ERR] SITE {site} has no configured VO for SLA {sla_name}" + ) click.echo("[-] Checking aditional VOs") # Now check which VOs are being reported without a SLA if not sla_vos: @@ -32,10 +43,17 @@ def check_site_slas(site, site_slas, goc, acct): non_sla_vos = acct.site_vos(site) - sla_vos.union(set(["ops"])) if non_sla_vos: click.echo( - f"[W] Site {site} has accounting for VOs {non_sla_vos}, non covered by SLA" + f"[W] Site {site} has accounting for VOs {non_sla_vos} but non covered by SLA" ) if "ops" not in acct.site_vos(site): click.echo(f"[W] SITE {site} has accounting for ops") + non_sla_appdb_vos = appdb_vos - sla_vos.union(set(["ops"])) + if non_sla_vos: + click.echo( + f"[W] Site {site} has VOs {non_sla_appdb_vos} configured but non covered by SLA" + ) + if "ops" not in appdb_vos: + click.echo(f"[W] SITE {site} has no configuration for ops") @click.command() @@ -57,11 +75,12 @@ def main( vo_map = yaml.load(vo_map_src, Loader=yaml.SafeLoader) acct = Accounting() goc = GOCDB() + appdb = AppDB() slas = goc.get_sites_slas(user_cert, vo_map) - click.echo("[-] Checking accounting over the last month...") + print(slas) if site: - check_site_slas(site, slas, goc, acct) + check_site_slas(site, slas, goc, acct, appdb) else: for site in acct.all_sites(): - check_site_slas(site, slas, goc, acct) + check_site_slas(site, slas, goc, acct, appdb) diff --git a/fedcloud_vm_monitoring/vm_monitor_cli.py b/fedcloud_vm_monitoring/vm_monitor_cli.py index 6934165..992ca65 100644 --- a/fedcloud_vm_monitoring/vm_monitor_cli.py +++ b/fedcloud_vm_monitoring/vm_monitor_cli.py @@ -93,8 +93,8 @@ def main( "search_filter": ldap_search_filter, } ) - appdb = AppDB(vo) - appdb_sites = appdb.get_sites_for_vo() + appdb = AppDB() + appdb_sites = appdb.get_sites_for_vo(vo) fedcloudclient_sites = list_sites(vo) sites = [site] if site else set(appdb_sites + fedcloudclient_sites) for s in sites: From c15807174dcd08d26983e618c391fca51d34b5d8 Mon Sep 17 00:00:00 2001 From: Enol Fernandez Date: Tue, 26 Nov 2024 11:09:53 +0000 Subject: [PATCH 10/13] Black fixes --- fedcloud_vm_monitoring/accounting.py | 1 + fedcloud_vm_monitoring/site_sla_cli.py | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/fedcloud_vm_monitoring/accounting.py b/fedcloud_vm_monitoring/accounting.py index f06c563..307fcfc 100644 --- a/fedcloud_vm_monitoring/accounting.py +++ b/fedcloud_vm_monitoring/accounting.py @@ -12,6 +12,7 @@ "/all/onlyinfrajobs/JSON/" ) + class Accounting: def __init__(self): self._data = {} diff --git a/fedcloud_vm_monitoring/site_sla_cli.py b/fedcloud_vm_monitoring/site_sla_cli.py index e53f488..a388ce9 100644 --- a/fedcloud_vm_monitoring/site_sla_cli.py +++ b/fedcloud_vm_monitoring/site_sla_cli.py @@ -33,9 +33,7 @@ def check_site_slas(site, site_slas, goc, acct, appdb): f"[OK] SITE {site} has configured {info_vos} for SLA {sla_name}" ) else: - click.echo( - f"[ERR] SITE {site} has no configured VO for SLA {sla_name}" - ) + click.echo(f"[ERR] SITE {site} has no configured VO for SLA {sla_name}") click.echo("[-] Checking aditional VOs") # Now check which VOs are being reported without a SLA if not sla_vos: From 813739b064ea37234f1ada744085db16767ce483 Mon Sep 17 00:00:00 2001 From: Enol Fernandez Date: Fri, 29 Nov 2024 16:46:58 +0000 Subject: [PATCH 11/13] Minor readme update --- README.md | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 48fb30a..5aa2032 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,12 @@ # fedcloud-monitoring-tools This repository contains a set of Python tools to monitor usage of EGI FedCloud -providers and remove long-running instances. The clients work with OpenStack -cloud providers supporting the OIDC protocol. +providers. The clients work with OpenStack cloud providers supporting the OIDC +protocol. ## Requirements - Python v3.9+ -- A Check-in account member of the VOs to be monitored -- For getting the EGI user identity, cloud providers have to enable the - `"identity:get_user"` API call for the user (see - [VO auditing](https://docs.egi.eu/providers/cloud-compute/openstack/aai/#vo-auditing) - for more information) ## Installation @@ -25,7 +20,20 @@ Some sites use certificates issued by certificate authorities that are not included in the default OS distribution, if you find SSL errors, please [install the EGI Core Trust Anchors certificates](https://fedcloudclient.fedcloud.eu/install.html#installing-egi-core-trust-anchor-certificates) -## Running the monitor +## fedcloud-vo-monitor + +`fedcloud-vo-monitor` checks the usage of a VO (e.g. running VMs, floating IPs +allocated, security groups) and identifies potential issues in the running VMs. + +### Requirements + +- A Check-in account member of the VOs to be monitored +- For getting the EGI user identity, cloud providers have to enable the + `"identity:get_user"` API call for the user (see + [VO auditing](https://docs.egi.eu/providers/cloud-compute/openstack/aai/#vo-auditing) + for more information) + +### Running the monitor For running the tool, you just need a [valid Check-in token](https://docs.egi.eu/users/aai/check-in/obtaining-tokens/), @@ -47,8 +55,8 @@ You can tune the behavior with the following parameters: - `--show-quotas BOOLEAN`: whether to show quotas for the VO or not (default: `True`) - `--check-ssh BOOLEAN`: Check SSH version on target VMs (default: `False`) -- `--check-cups BOOLEAN`: Check whether TCP/UDP port 631 is accessible - (default: `False`) +- `--check-cups BOOLEAN`: Check whether TCP/UDP port 631 is accessible (default: + `False`) If you have access to [Check-in LDAP](https://docs.egi.eu/users/aai/check-in/vos/#ldap) for VO @@ -60,7 +68,7 @@ membership, you can specify the settings with the following options: The `ldap-server`, `ldap-base-dn` and `ldap-search-filter`, can further tune the usage of LDAP, but should work for most cases without changes. -### Sample output +#### Sample output ```shell $ fedcloud-vo-monitor --vo cloud.egi.eu @@ -193,6 +201,16 @@ Getting VMs information [####################################] 100% [-] WARNING: Less than 3 security groups per instance ``` +## fedcloud-sla-monitor + +`fedcloud-sla-monitor` checks the configuration of sites supporting SLAs. It +compares the reported usage in the accoutnting portal and the information +retrieved from the cloud-info-provider and reports any deviations. + +### Requirements + +- An IGTF certificate to query GOCDB SLA lists + ## Useful links - [OpenStack API](https://docs.openstack.org/api-ref/) From 47c8218b5bde6d50ae5582d76007e133478ed23e Mon Sep 17 00:00:00 2001 From: Enol Fernandez Date: Fri, 29 Nov 2024 16:49:47 +0000 Subject: [PATCH 12/13] Name things consistently --- fedcloud_vm_monitoring/{site_sla_cli.py => sla_monitor_cli.py} | 0 pyproject.toml | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename fedcloud_vm_monitoring/{site_sla_cli.py => sla_monitor_cli.py} (100%) diff --git a/fedcloud_vm_monitoring/site_sla_cli.py b/fedcloud_vm_monitoring/sla_monitor_cli.py similarity index 100% rename from fedcloud_vm_monitoring/site_sla_cli.py rename to fedcloud_vm_monitoring/sla_monitor_cli.py diff --git a/pyproject.toml b/pyproject.toml index 921eaa8..f3c54b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ include = [ [tool.poetry.scripts] fedcloud-vo-monitor = "fedcloud_vm_monitoring.vm_monitor_cli:main" -fedcloud-site-sla = "fedcloud_vm_monitoring.site_sla_cli:main" +fedcloud-sla-monitor = "fedcloud_vm_monitoring.sla_monitor_cli:main" [tool.poetry.dependencies] From ce2c4431b34c57ebfd6a8d692f056fb49c0a4433 Mon Sep 17 00:00:00 2001 From: Enol Fernandez Date: Fri, 29 Nov 2024 16:54:31 +0000 Subject: [PATCH 13/13] add command --- README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/README.md b/README.md index 5aa2032..5f531c2 100644 --- a/README.md +++ b/README.md @@ -211,7 +211,23 @@ retrieved from the cloud-info-provider and reports any deviations. - An IGTF certificate to query GOCDB SLA lists +### Running the monitor + + +```shell +$ fedcloud-sla-monitor --help +Usage: fedcloud-sla-monitor [OPTIONS] + +Options: + --site TEXT Site to check + --user-cert TEXT User certificate (for GOCDB queries) [required] + --vo-map-file TEXT SLA-VO mapping file + --help Show this message and exit. +``` + ## Useful links - [OpenStack API](https://docs.openstack.org/api-ref/) - [OpenStack API examples](https://docs.openstack.org/keystone/pike/api_curl_examples.html) + +