运行状况检查失败-Terraform-ECS群集-动态端口映射运行状况检查在2个端口中的1个上失败?

时间:2019-06-08 01:56:44

标签: amazon-web-services terraform amazon-ecs aws-alb

我使用Terraform进行了环境设置。我们的SecOps团队之一需要安装SonarQube CE,以进行自动扫描,气味和外伤检查。因此,我让它在AWS上运行,通过我们的VPN,DNS解析为面向内部的ALB,该ALB将流量指向构成ECS群集的目标实例组。还有一个启动配置和自动缩放组。当前,每台主机仅运行一个容器时,希望将其设置为2-4。

我遇到的问题是实例正在容器端口9000和动态临时端口32768上同时注册到目标组。对动态端口的运行状况检查正常,但是对端口9000的运行状况检查失败了。这导致实例在初始,不正常运行和反复终止之间循环。除了这个烦人的问题,该应用程序还可以正常运行。通过RDS连接,我们可以很好地使用SonarQube。

我尝试在Terraform中删除对容器端口的引用,我还要指出这是一个非常安全的环境。来自任何VPC的所有出口流量都通过McAffee云代理设备过滤掉。当我第一次在沙盒帐户中建立帐户时,出口到0.0.0.0/0时一切正常。现在,我已经花了几个小时,正挠着头。

希望其他人到过这里并分享他们的见解。明天毕竟是新的一天。帮助!

ERROR Message when I remove the port from the target group
aws_lb_target_group.ecs: port should be set when target type is instance
ERROR Message when I set the port to 0

aws_ecs_service.ecs: InvalidParameterException: The container sonarqube did not have a container port 0 defined.
ERROR Message when I set the container port to 0 in the taskdef.

aws_ecs_task_definition.task: ClientException: Invalid 'containerPort' setting for container 'sonarqube'
ecs-taskdef.tf

resource "aws_ecs_task_definition" "task" {
  family             = "${var.name}-${var.env}"
  network_mode       = "bridge"
  cpu                = 8192
  memory             = 16384
  execution_role_arn = "${var.ecs-exec-role}"

  container_definitions = <<DEFINITION
[
    {
        "name": "${var.name}",
        "image":"${var.image}",
        "logConfiguration": {
            "logDriver": "awslogs",
            "options": {
                "awslogs-group": "/ecs/${var.cluster_name}-${var.name}",
                "awslogs-region": "${var.region}",
                "awslogs-stream-prefix": "ecs"
            }
        },
        "portMappings": [
            {
                "containerPort": 9000
            }
        ],
        "environment": [
            {
            "name": "sonar.jdbc.password",
            "value": "${var.password}"
            },
            {
            "name": "sonar.jdbc.url",
            "value": "jdbc:mysql://${var.rds_url}:${var.port}/sonar?useUnicode=true&characterEncoding=utf8&rewriteBatchedStatements=true&useConfigs=maxPerformance"
            },
            {
            "name": "sonar.jdbc.username",
            "value": "${var.username}"
            }
        ]  
    }
]
DEFINITION
}

resource "aws_ecs_service" "ecs" {
  name                = "${var.name}-${var.env}"
  cluster             = "${var.cluster_name}"
  task_definition     = "${aws_ecs_task_definition.task.arn}"
  scheduling_strategy = "DAEMON"

  lifecycle {
    ignore_changes = ["desired_count"]
  }

  load_balancer {
    target_group_arn = "${aws_lb_target_group.ecs.arn}"
    container_name   = "${var.name}"
    container_port   = 9000 #Removed & Terraform complains with an error.
  }
}



elb.tf

resource "aws_lb" "ecs" {
  name               = "${var.name_prefix}-${var.name}-tf"
  internal           = true
  load_balancer_type = "application"

  security_groups            = ["${var.security_groups}"]
  subnets                    = ["${var.subnets}"]
  enable_deletion_protection = false

  tags = "${merge(var.tags, map("Name", "${var.name_prefix}-${var.name}-elb"))}"
}

resource "aws_lb_listener" "ecs" {
  load_balancer_arn = "${aws_lb.ecs.arn}"
  port              = 80
  protocol          = "HTTP"

  default_action {
    type = "redirect"
    redirect {
      port        = "443"
      protocol    = "HTTPS"
      status_code = "HTTP_301"
    }
  }
}

resource "aws_lb_listener" "ssl" {
  load_balancer_arn = "${aws_lb.ecs.arn}"
  port              = 443
  protocol          = "HTTPS"

  lifecycle {
    create_before_destroy = true
  }

  ssl_policy = "ELBSecurityPolicy-2016-08"

  certificate_arn = "arn:aws:acm:REDACTED"

  default_action {
    type             = "forward"
    target_group_arn = "${aws_lb_target_group.ecs.arn}"
  }
}

resource "aws_lb_target_group" "ecs" {
  name     = "${var.cluster_name}"
  protocol = "HTTP"

  port   = 9000 #must be here or TF errors instance type must have port
  vpc_id = "${var.vpc_id}"

  lifecycle {
    create_before_destroy = true
  }
}

ec2.tf

resource "aws_autoscaling_group" "asg" {
  availability_zones        = ["${var.region}a", "${var.region}b", "${var.region}d"]
  name                      = "${var.name}-${var.env}-asg"
  max_size                  = "${var.min_size}"
  min_size                  = "${var.max_size}"
  health_check_grace_period = 300
  health_check_type         = "ELB"
  desired_capacity          = "${var.desired_size}"
  launch_configuration      = "${aws_launch_configuration.alc.name}"
  vpc_zone_identifier       = ["${var.subnet_ids}"]

  target_group_arns = ["${var.target_arn}"]

  lifecycle {
    create_before_destroy = true
  }

  tag {
    key                 = "Environment"
    value               = "${var.name}"
    propagate_at_launch = true
  }

  tag {
    key                 = "Name"
    value               = "${var.name_prefix}-${var.name}.ecs"
    propagate_at_launch = true
  }
}

resource "aws_launch_configuration" "alc" {
  name_prefix          = "${var.name_prefix}.ecs"
  image_id             = "${lookup(var.ecs-images, var.region)}"
  instance_type        = "${var.instance_type}"
  iam_instance_profile = "${aws_iam_instance_profile.ecs-instance-profile.arn}"
  user_data            = "${data.template_file.userdata.rendered}"
  key_name             = "${var.key_name}"

  security_groups = ["${var.security_groups}"]

  lifecycle {
    create_before_destroy = true
  }

  root_block_device {
    volume_type = "io1"
    iops        = "1000"
    volume_size = "${var.volume_size}"
  }
}

data "template_file" "userdata" {
  template = "${file("${path.module}/userdata/ecs-instances.sh")}"

  vars {
    cluster-name = "${aws_ecs_cluster.cluster.name}"
  }
}

resource "aws_security_group" "allow_all_from_cluster" {
  name        = "${var.name_prefix}-${var.name}-ecs-cluster"
  description = "Allow traffic from cluster"
  vpc_id      = "${var.vpc_id}"
  tags        = "${merge(var.tags, map("Name", "${var.name_prefix}-${var.name}-sg"))}"

  lifecycle {
    create_before_destroy = true
  }

  ingress { #open to VPC IP's
    from_port   = 0
    to_port     = 0
    protocol    = "-1"
    cidr_blocks = ["172.27.160.0/22"]
  }

  ingress { #open to corp network redirected to 443
    from_port   = 80
    to_port     = 80
    protocol    = "tcp"
    cidr_blocks = ["10.0.0.0/8"]
  }

  ingress { #http access for corp users
    from_port   = 443
    to_port     = 443
    protocol    = "tcp"
    cidr_blocks = ["10.0.0.0/8"]
  }

  egress { #open to VPC IP's
    from_port   = 0                   
    to_port     = 0
    protocol    = "-1"
    cidr_blocks = ["172.27.160.0/22"]
  }

  egress { #ephemeral response to corp users
    from_port   = 32768
    to_port     = 65535
    protocol    = "tcp"
    cidr_blocks = ["10.0.0.0/8"]
  }
}
iam.tf

resource "aws_iam_role" "iam_role" {
  name = "${var.name}-ecs-role"

  assume_role_policy = <<EOF
{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Sid": "",
      "Effect": "Allow",
      "Principal": {
        "Service": "ecs.amazonaws.com"
      },
      "Action": "sts:AssumeRole"
    },
    {
      "Sid": "",
      "Effect": "Allow",
      "Principal": {
        "Service": "ec2.amazonaws.com"
      },
      "Action": "sts:AssumeRole"
    }
  ]
}


EOF
}

resource "aws_iam_policy" "efs-policy" {
  name        = "${var.env}-efs-access-policy"
  path        = "/"
  description = "Allow ${var.env} cluster access to EFS"

  policy = <<EOF
{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Action": [
        "elasticfilesystem:*"
      ],
      "Effect": "Allow",
      "Resource": "*"
    }
  ]
}
EOF
}

resource "aws_iam_role_policy_attachment" "ecs-service-role" {
  role       = "${aws_iam_role.iam_role.name}"
  policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceRole"
}

resource "aws_iam_role_policy_attachment" "ecs-service-for-ec2-role" {
  role       = "${aws_iam_role.iam_role.name}"
  policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role"
}

resource "aws_iam_role_policy_attachment" "ssm-service-role" {
  role       = "${aws_iam_role.iam_role.name}"
  policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM"
}

resource "aws_iam_role_policy_attachment" "efs-for-ec2-role" {
  role       = "${aws_iam_role.iam_role.name}"
  policy_arn = "${aws_iam_policy.efs-policy.arn}"
}

resource "aws_iam_instance_profile" "ecs-instance-profile" {
  name = "${var.env}-ecs"
  role = "${aws_iam_role.iam_role.name}"
}

预期的运行状况检查仅在动态端口上进行。我可以从端口9000的目标组中删除该实例。对于两个端口,每个实例在已注册的目标分区中都显示两次。我删除了端口9000,实例仍然可用。

1 个答案:

答案 0 :(得分:0)

如果您使用临时端口,那么它实际上并不在乎您在containerPort中指定的内容。在我的Terraform中,将端口9000用作已定义的ContainerPort只是因为它需要一个值,并且hostPort被指定为0。已创建安全组以解决临时使用问题,并且使用/可以正常运行状况检查和trafficPort。