Setting up AWS Fargate on ECS

Written — Updated
  • This mostly focuses on using Fargate for one-off jobs. Configuration is in Terraform.
  • Set up a VPC for your Cluster if you need to.
  • ECS Cluster
    • You need to create an ECS cluster but it doesn't need any configuration beyond being created.
    • resource "aws_ecs_cluster" "pipeline" {
        name = "pipeline"
      }
      
  • Running on ARM
    • Add "runtimePlatform": { "cpuArchitecture": "ARM64" } to your task definition.
  • Roles
    • A task can have a task role and an execution role.
    • The task role is given to your containers.
    • The execution role is given to the instance that runs your containers.
    • Both of these need an "assume role policy" that allows ECS to assume those roles.
    • Terraform for the roles. This also sets up extra S3 permissions on the task execution role in case you are sending your logs to S3 (see below).
      • data "aws_iam_policy_document" "ecs_assume_role" {
          statement {
            actions = ["sts:AssumeRole"]
            effect = "Allow"
            principals {
              type = "Service"
              identifiers = ["ecs-tasks.amazonaws.com"]
            }
        
            condition {
              test = "ArnLike"
              variable = "aws:SourceArn"
              values = [
                format("arn:aws:ecs:%s:%s:*", var.aws_region, var.aws_account_id)
              ]
            }
        
            condition {
              test = "StringEquals"
              variable = "aws:SourceAccount"
              values = [
                var.aws_account_id
              ]
            }
        
          }
        }
        
        resource "aws_iam_role" "my_task_execution_role" {
          name = "fargate_my_task_execution"
        
          assume_role_policy = data.aws_iam_policy_document.ecs_assume_role.json
        
          inline_policy {
            name = "s3_put"
            policy = jsonencode({
              "Version": "2012-10-17",
              "Statement": [        
                {
                  "Effect": "Allow",
                  "Action": [
                     "s3:*"
                  ],
                  "Resource": [
                    "arn:aws:s3:::my-app-logs",
                    "arn:aws:s3:::my-app-logs/*"
                   ]
                },
                {
                  "Effect": "Allow",
                  "Action": [
                    "ecr:GetAuthorizationToken",
                    "logs:CreateLogGroup",
                    "logs:CreateLogStream",
                    "logs:PutLogEvents"
                  ],
                  "Resource": "*"
                },
                {
                  "Effect": "Allow",
                  "Action": [
                    "ecr:BatchCheckLayerAvailability",
                    "ecr:BatchGetImage",
                    "ecr:DescribeImages",
                    "ecr:DescribeRepositories",
                    "ecr:GetDownloadUrlForLayer",
                    "ecr:GetRepositoryPolicy",
                    "ecr:Images"
                  ],
                  "Resource": [
                    format("arn:aws:ecr:%s:%s:repository/*", var.aws_region, var.aws_account_id)
                  ]
                }
              ]
            })
          }
        }
        
        resource "aws_iam_role" "my_task_role" {
          name = "fargate_my_task"
        
          assume_role_policy = data.aws_iam_policy_document.ecs_assume_role.json
        
          inline_policy {
            name = "s3_put"
            policy = jsonencode({
              "Version": "2012-10-17",
              "Statement": [{
                "Effect": "Allow",
                "Action": [
                  "s3:*"
                ],
                "Resource": "*"
              }]
            })
          }
        }
        
        output "my_task_role_arn" {
          value = aws_iam_role.my_task_role.arn
        }
        
        output "my_task_execution_role_arn" {
          value = aws_iam_role.my_task_execution_role.arn
        }
        
        
  • Sending Logs to S3
    • Cloudwatch is the AWS-recommended way to ship logs out but you can also send them to S3.
    • First you'll need roles like the ones in the previous section to give S3 permissions.
    • Then create your S3 bucket.
      • This configuration also autodeletes the files after 90 days.
      • resource "aws_s3_bucket" "my_app_logs" {
          bucket = "my-app-logs"
        }
        
        resource "aws_s3_bucket_lifecycle_configuration" "my_app_logs" {
          bucket = aws_s3_bucket.my_app_logs.id
          rule {
            id = "delete-old-logs"
            expiration {
              days = 90
            }
            status = "Enabled"
          }
        }
        
        
    • Then set up your task definition like so.
    • {
        "taskRoleArn": "ARN of the task role above",
        "executionRoleArn": "ARN of execution role above",
        "requiresCompatibilities": ["FARGATE"],
        "containerDefinitions": [
          // Only needed for custom log routing.
          {
            "name": "log_router",
            "essential": true,
            "image": "amazon/aws-for-fluent-bit:stable",
            "firelensConfiguration": {
              "type": "fluentbit"
            },
            "logConfiguration": {
              "logDriver": "awslogs",
              "options": {
                "awslogs-group": "firelens-container",
                "awslogs-region": "us-west-2",
                "awslogs-create-group": "true",
                "awslogs-stream-prefix": "firelens"
              }
            },
            "memoryReservation": 50
          },
          {
            "name": "my_app",
            "image": "acctnum.dkr.ecr.region.amazonaws.com/image:tag",
            "portMappings": [],
            "essential": true,
            "logConfiguration": {
              "logDriver": "awsfirelens",
              "options": {
                "Name": "s3",
                // Optional, to customize key format which can be useful for batch jobs
                "s3_key_format":
                  "/%Y/%m/%Y-%m-%d-my-app-$TAG-%S",
                "region": "your-aws-region",
                "bucket": "my-app-logs",
                // Rotate to a new log file when this size is reached.
                // For persistent services this should be much smaller.
                // For batch jobs I set it large since it's nice to have
                // all the logs for a run in one file.
                "total_file_size": "10G",
                "upload_timeout": "1m",
                "retry_limit": "2"
              },
            }
          }
        ]
      }
      
    • The logConfiguration options are documented at https://docs.fluentbit.io/manual/pipeline/outputs/s3

Thanks for reading! If you have any questions or comments, please send me a note on Twitter.