From 6add13b14c80b3adfc46d67c01ef32537b6fa4dc Mon Sep 17 00:00:00 2001 From: "andrew@technative.eu" Date: Fri, 30 Aug 2024 11:06:46 +0200 Subject: [PATCH] Updated Readme with a known error. Updated Lambda alarm creator to be able to loop through priorities instead of only running once --- README.md | 8 +++ alarm_creator/actions.py | 118 +++++++++++++++++++-------------------- 2 files changed, 67 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index bee6280..3da711c 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,14 @@ module "observability_sender" { } ``` +## Put exceeded error when running alarm creator Lambda + +At first run you might end up with a put exceeded error where you are trying to create too many alarms at once. + +You need to rerun the Lambda alarm creator a few times maybe with a shorter list. This allows you to not reach the maximum threshold set by AWS. + +You need to also clean up the SQS queue in the observablity hub account as the error might hang in the SQS queue even though the problem is resolved. + ## Providers diff --git a/alarm_creator/actions.py b/alarm_creator/actions.py index c5e873b..38dcdd9 100644 --- a/alarm_creator/actions.py +++ b/alarm_creator/actions.py @@ -37,10 +37,10 @@ def AWS_Alarms(): instances = GetRunningDBInstances() elif service == "CWAgent": instances = GetRunningInstances() - # elif service == "ECS": - # instances = GetRunningClusters() - # elif service == "ElastiCache": - # instances = GetRunningCacheClusters() + elif service == "ECS": + instances = GetRunningClusters() + elif service == "ElastiCache": + instances = GetRunningCacheClusters() for alarm in alarms[service]: # Query the namespaces in CloudWatch Metrics @@ -59,63 +59,63 @@ def AWS_Alarms(): else: cw_threshold = int(threshold) - # Handling dimensions - for instance in instances: - - instanceDimensions = { - "Name": f"{alarms[service][alarm]['Dimensions']}", - "Value": instance - } - - #Add any additional disk-related dimensions if present - if 'ExtraDimensions' in alarms[service][alarm]: - dimensionlist.extend(alarms[service][alarm]['ExtraDimensions']) - - for dimension in dimensionlist: - if dimension["Name"] == "path" and dimension["Value"] == "/": - # Query the namespaces in CloudWatch Metrics - # Find the correct device dimension for the root volume - response_2 = CWclient.list_metrics(Namespace=f"{alarms[service][alarm]['Namespace']}", RecentlyActive='PT3H', - Dimensions=[instanceDimensions, {'Name': 'path', 'Value': '/'}] + # Handling dimensions + for instance in instances: + + instanceDimensions = { + "Name": f"{alarms[service][alarm]['Dimensions']}", + "Value": instance + } + + #Add any additional disk-related dimensions if present + if 'ExtraDimensions' in alarms[service][alarm]: + dimensionlist.extend(alarms[service][alarm]['ExtraDimensions']) + + for dimension in dimensionlist: + if dimension["Name"] == "path" and dimension["Value"] == "/": + # Query the namespaces in CloudWatch Metrics + # Find the correct device dimension for the root volume + response_2 = CWclient.list_metrics(Namespace=f"{alarms[service][alarm]['Namespace']}", RecentlyActive='PT3H', + Dimensions=[instanceDimensions, {'Name': 'path', 'Value': '/'}] + ) + + for metrics in response_2["Metrics"]: + for dimension in metrics["Dimensions"]: + if dimension['Name'] == "device": + + dimensionlist = [ + instanceDimensions, + { + "Name": "device", + "Value": f"{dimension['Value']}" + } + ] + dimensionlist.extend(alarms[service][alarm]['ExtraDimensions']) + else: + continue + else: + #Clean up dimensionlist if not extra dimensions are present and only add the instance dimension + dimensionlist = [] + dimensionlist = [instanceDimensions] + + + # Create the alarms + CWclient.put_metric_alarm( + AlarmName=f"{instance}-{alarm} {alarms[service][alarm]['Description']['Operatorsymbol']} {threshold} {alarms[service][alarm]['Description']['ThresholdUnit']}", + ComparisonOperator=alarms[service][alarm]['ComparisonOperator'], + EvaluationPeriods=alarms[service][alarm]['EvaluationPeriods'], + MetricName=alarms[service][alarm]['MetricName'], + Namespace=alarms[service][alarm]['Namespace'], + Period=alarms[service][alarm]['Period'], + Statistic=alarms[service][alarm]['Statistic'], + Threshold=cw_threshold, + ActionsEnabled=True, + TreatMissingData=alarms[service][alarm]['TreatMissingData'], + AlarmDescription=f"{priority}", + Dimensions=dimensionlist, + Tags=[{"Key": "CreatedbyLambda", "Value": "True"}], ) - for metrics in response_2["Metrics"]: - for dimension in metrics["Dimensions"]: - if dimension['Name'] == "device": - - dimensionlist = [ - instanceDimensions, - { - "Name": "device", - "Value": f"{dimension['Value']}" - } - ] - dimensionlist.extend(alarms[service][alarm]['ExtraDimensions']) - else: - continue - else: - #Clean up dimensionlist if not extra dimensions are present and only add the instance dimension - dimensionlist = [] - dimensionlist = [instanceDimensions] - - - # Create the alarms - CWclient.put_metric_alarm( - AlarmName=f"{instance}-{alarm} {alarms[service][alarm]['Description']['Operatorsymbol']} {threshold} {alarms[service][alarm]['Description']['ThresholdUnit']}", - ComparisonOperator=alarms[service][alarm]['ComparisonOperator'], - EvaluationPeriods=alarms[service][alarm]['EvaluationPeriods'], - MetricName=alarms[service][alarm]['MetricName'], - Namespace=alarms[service][alarm]['Namespace'], - Period=alarms[service][alarm]['Period'], - Statistic=alarms[service][alarm]['Statistic'], - Threshold=cw_threshold, - ActionsEnabled=True, - TreatMissingData=alarms[service][alarm]['TreatMissingData'], - AlarmDescription=f"{priority}", - Dimensions=dimensionlist, - Tags=[{"Key": "CreatedbyLambda", "Value": "True"}], - ) - def GetRunningInstances():