add alarms for failed lambdas

This commit is contained in:
Will Freeman
2025-07-21 12:46:44 -06:00
parent ff5602b897
commit 97a50f48e5
6 changed files with 73 additions and 7 deletions

View File

@@ -3,15 +3,27 @@ provider "aws" {
}
module "alpr_counts" {
module_name = "alpr_counts"
source = "./modules/alpr_counts"
module_name = "alpr_counts"
source = "./modules/alpr_counts"
deflock_stats_bucket = var.deflock_stats_bucket
rate = "rate(60 minutes)"
rate = "rate(60 minutes)"
sns_topic_arn = aws_sns_topic.lambda_alarms.arn
}
module "alpr_clusters" {
module_name = "alpr_clusters"
source = "./modules/alpr_clusters"
deflock_cdn_bucket = var.deflock_cdn_bucket
rate = "rate(1 hour)"
module_name = "alpr_clusters"
source = "./modules/alpr_clusters"
deflock_cdn_bucket = var.deflock_cdn_bucket
rate = "rate(1 hour)"
sns_topic_arn = aws_sns_topic.lambda_alarms.arn
}
resource "aws_sns_topic" "lambda_alarms" {
name = "lambda_alarms_topic"
}
resource "aws_sns_topic_subscription" "sms_subscription" {
topic_arn = aws_sns_topic.lambda_alarms.arn
protocol = "sms"
endpoint = var.alarm_phone_number
}

View File

@@ -89,3 +89,23 @@ resource "aws_cloudwatch_log_group" "lambda_log_group" {
name = "/aws/lambda/${aws_lambda_function.overpass_lambda.function_name}"
retention_in_days = 14
}
resource "aws_sns_topic" "lambda_alarms" {
name = "${var.module_name}_lambda_alarms"
}
resource "aws_cloudwatch_metric_alarm" "lambda_error_alarm" {
alarm_name = "${var.module_name}_execution_error"
alarm_description = "An error has occurred while executing the ${var.module_name} Lambda"
namespace = "AWS/Lambda"
metric_name = "Errors"
dimensions = {
FunctionName = aws_lambda_function.overpass_lambda.function_name
}
statistic = "Sum"
period = 86400 # 1 day
evaluation_periods = 1
threshold = 0
comparison_operator = "GreaterThanThreshold"
alarm_actions = [var.sns_topic_arn]
}

View File

@@ -9,3 +9,8 @@ variable "deflock_cdn_bucket" {
variable "rate" {
description = "Rate at which to run the Lambda function"
}
variable "sns_topic_arn" {
description = "The ARN of the SNS topic for Lambda alarms"
type = string
}

View File

@@ -120,3 +120,22 @@ resource "aws_iam_role_policy_attachment" "lambda_cloudwatch_logs_attachment" {
role = aws_iam_role.lambda_role.name
policy_arn = aws_iam_policy.lambda_cloudwatch_logs_policy.arn
}
# Alarms for Failure
resource "aws_cloudwatch_metric_alarm" "lambda_error_alarm" {
alarm_name = "${var.module_name}_execution_error"
alarm_description = "An error has occurred while executing the ${var.module_name} Lambda"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 1
metric_name = "Errors"
namespace = "AWS/Lambda"
period = 86400 # 1 day
statistic = "Sum"
threshold = 0
dimensions = {
FunctionName = aws_lambda_function.overpass_lambda.function_name
}
alarm_actions = [var.sns_topic_arn]
}

View File

@@ -26,3 +26,8 @@ variable "aws_account_id" {
type = string
default = "912821578123"
}
variable "sns_topic_arn" {
description = "The ARN of the SNS topic for Lambda alarms"
type = string
}

View File

@@ -7,3 +7,8 @@ variable "deflock_cdn_bucket" {
description = "S3 bucket for the CDN"
default = "cdn.deflock.me"
}
variable "alarm_phone_number" {
description = "Phone number to receive alarm notifications"
# intentionally left blank since this file is checked into git
}