我已经为AWS ECS向外扩展定义了CloudWatch警报。
通常正常。但有时会失败,并显示以下错误。 500是向外扩展的阈值。公制时间是每5分钟一次。并且横向扩展数据点为2分之一(意味着一个值在10分钟内超过阈值):
“错误”:“未找到度量值的步进调整 [437.08774491907025,516.9558339660845]和违反阈值500.0“
步进调整的定义如下:
step_adjustment {
metric_interval_lower_bound = 0
scaling_adjustment = 1
}
警报配置:
datapoints_to_alarm = "1"
evaluation_periods = "2"
threshold = "500"
用于创建警报的Terraform代码
resource "aws_appautoscaling_policy" "task_count_up" {
name = "appScalingPolicy_${aws_ecs_service.sqs_to_kinesis.name}_ScaleUp"
service_namespace = "ecs"
resource_id = "service/${aws_ecs_cluster.shared-elb-access-logs-processor.name}/${aws_ecs_service.sqs_to_kinesis.name}"
scalable_dimension = "ecs:service:DesiredCount"
step_scaling_policy_configuration {
adjustment_type = "ChangeInCapacity"
cooldown = "${var.scale_up_cooldown_seconds}"
metric_aggregation_type = "Maximum"
step_adjustment {
metric_interval_lower_bound = 0
scaling_adjustment = 1
}
}
depends_on = [
"aws_appautoscaling_target.main",
]
}
resource "aws_appautoscaling_policy" "task_count_down" {
name = "appScalingPolicy_${aws_ecs_service.sqs_to_kinesis.name}_ScaleDown"
service_namespace = "ecs"
resource_id = "service/${aws_ecs_cluster.shared-elb-access-logs-processor.name}/${aws_ecs_service.sqs_to_kinesis.name}"
scalable_dimension = "ecs:service:DesiredCount"
step_scaling_policy_configuration {
adjustment_type = "ChangeInCapacity"
cooldown = "${var.scale_down_cooldown_seconds}"
metric_aggregation_type = "Minimum"
step_adjustment {
metric_interval_upper_bound = 0
scaling_adjustment = -1
}
}
depends_on = [
"aws_appautoscaling_target.main",
]
}