使用Terraform创建具有目标组的AWS ECS始终超时

时间:2018-02-23 16:32:38

标签: amazon-web-services terraform amazon-ecs

Terraform版本

v0.11.3

受影响的资源

  • aws_ecs_service
  • aws_ecs_task_definition
  • aws_alb
  • aws_alb_target_group
  • aws_alb_listener

错误

我正在设置一个当前有一项服务的ECS群集。有几个问题在没有破坏的情况下获得服务,但现在我的服务似乎无法保持容器运行。

A

相关?

一旦我的资源启动,我似乎无法在任何实例或vpc网关上找到公共DNS链接

Service phoenix-web target group is unhealthy

我的ECS服务模块的

main.tf:

service phoenix-web (instance i-079707fc669361a81) (port 80) is unhealthy in target-group tgqaphoenix-web due to (reason Request timed out)

我的ECS集群模块的main.tf

data "template_file" "ecs_task_definition_config" {
  template = "${file("config/ecs-task.json")}"
}

resource "aws_ecs_task_definition" "phoenix-web" {
  lifecycle {
    create_before_destroy = true
  }

  family                = "nginx-phoenix-task"
  container_definitions = "${data.template_file.ecs_task_definition_config.rendered}"
}

resource "aws_security_group" "main" {
  vpc_id = "${var.vpc_id}"

  tags {
    Name        = "sg${var.name}LoadBalancer"
    Project     = "${var.name}"
    Environment = "${var.environment}"
  }
}

resource "aws_security_group_rule" "app_lb_https_ingress" {
  type        = "ingress"
  from_port   = 80
  to_port     = 80
  protocol    = "tcp"
  cidr_blocks = ["0.0.0.0/0"]

  security_group_id = "${aws_security_group.main.id}"
}

resource "aws_alb" "main" {
  security_groups = ["${aws_security_group.main.id}"]
  subnets         = ["${var.public_subnet_ids}"]
  name            = "alb-${var.environment}-${var.name}"

  access_logs {
    bucket = "${var.access_log_bucket}"
    prefix = "${var.access_log_prefix}"
  }

  tags {
    Name        = "alb-${var.environment}-${var.name}"
    Project     = "${var.name}"
    Environment = "${var.environment}"
  }
}

resource "aws_alb_target_group" "main" {
  name = "tg${var.environment}${var.name}"

  health_check {
    healthy_threshold   = "3"
    interval            = "30"
    protocol            = "HTTP"
    timeout             = "3"
    path                = "/healthz"
    unhealthy_threshold = "2"
  }

  port     = "80"
  protocol = "HTTP"
  vpc_id   = "${var.vpc_id}"

  tags {
    Name        = "tg${var.environment}${var.name}"
    Project     = "${var.name}"
    Environment = "${var.environment}"
  }

  depends_on = ["aws_alb.main"]
}

resource "aws_alb_listener" "https" {
  load_balancer_arn = "${aws_alb.main.id}"
  port              = "80"
  protocol          = "HTTP"

  default_action {
    target_group_arn = "${aws_alb_target_group.main.id}"
    type             = "forward"
  }
}

resource "aws_ecs_service" "service" {
  lifecycle {
    create_before_destroy = true
  }

  name                               = "${var.name}"
  cluster                            = "${var.environment}"
  task_definition                    = "${aws_ecs_task_definition.phoenix-web.id}"
  desired_count                      = "${var.desired_count}"
  deployment_minimum_healthy_percent = "${var.deployment_min_healthy_percent}"
  deployment_maximum_percent         = "${var.deployment_max_percent}"
  iam_role                           = "${aws_iam_role.ecs-role.id}"

  load_balancer {
    target_group_arn = "${aws_alb_target_group.main.id}"
    container_name   = "phoenix-web"
    container_port   = "80"
  }

  depends_on = ["aws_iam_role.ecs-role", "null_resource.alb_exists"]
}

resource "aws_iam_role_policy" "ecs-policy" {
  name = "ecs-policy"
  role = "${aws_iam_role.ecs-role.id}"
  policy = <<EOF
{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Action": [
        "ecs:CreateCluster",
        "ecs:DeregisterContainerInstance",
        "ecs:DiscoverPollEndpoint",
        "ecs:Poll",
        "ecs:RegisterContainerInstance",
        "ecs:StartTelemetrySession",
        "ecs:Submit*",
        "ecr:GetAuthorizationToken",
        "ecr:BatchCheckLayerAvailability",
        "ecr:GetDownloadUrlForLayer",
        "ecr:BatchGetImage",
        "ec2:AuthorizeSecurityGroupIngress",
        "ec2:Describe*",
        "elasticloadbalancing:DeregisterInstancesFromLoadBalancer",
        "elasticloadbalancing:Describe*",
        "elasticloadbalancing:RegisterInstancesWithLoadBalancer",
        "elasticloadbalancing:RegisterTargets",
        "elasticloadbalancing:DeregisterTargets"
      ],
      "Resource": "*"
    }
  ]
}
EOF

  depends_on = ["aws_iam_role.ecs-role"]
}

resource "aws_iam_role" "ecs-role" {
  name = "ecs-role"
  assume_role_policy = <<EOF
{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Action": "sts:AssumeRole",
      "Principal": {
        "Service": "ecs.amazonaws.com"
      },
      "Effect": "Allow",
      "Sid": ""
    }
  ]
}
EOF

}

resource "aws_appautoscaling_target" "main" {
  service_namespace  = "ecs"
  resource_id        = "service/${var.environment}/${var.name}"
  scalable_dimension = "ecs:service:DesiredCount"
  role_arn           = "${aws_iam_role.ecs-role.arn}"
  min_capacity       = "${var.min_count}"
  max_capacity       = "${var.max_count}"

  depends_on = [
    "aws_ecs_service.service",
  ]
}

resource "null_resource" "alb_exists" {
  triggers {
    alb_name = "${aws_alb_target_group.main.id}"
  }
}

2 个答案:

答案 0 :(得分:0)

应用程序运行状况检查似乎失败了,即/ healthz。您开始调试以下问题:

1)在当地调出一个容器,检查它是否正常工作。根据上面的健康检查信息,您应该可以访问http://someip:port/healthz之类的应用 如果这样的话 2)在构建docker镜像时是否暴露了端口80?签入docker文件。 3)如果以上两个步骤似乎没问题,那么一旦任务运行,请尝试使用EC S实例ip访问您的应用程序。 http://ecsinstanceip:port/healthz。 4)如果3也有效,他们会尝试增加健康检查超时期限,以便应用程序有更多时间通过健康检查。

答案 1 :(得分:0)

线索1

确保ECS容器实例的安全组能够接受VPN内的端口1024-65535(不要为外部世界打开它)

线索2

portMappings的任务定义上,如下指定:

  "portMappings": [
    {
      "hostPort": 0,
      "protocol": "tcp",
      "containerPort": 80 
    }
  ],

请注意此处: containerPort是您expose从容器中所读取的内容,您的应用正在通过其运行状况检查在其中进行监听 hostPort将是您绑定用于在主机上转发的端口。保留为0,它将由ECS自动分配,这就是为什么您需要在SG上打开1024-65535的原因。这是必需的,因此您将能够在同一实例上多次运行相同的任务定义(水平缩放)。