'Error creating EKS node-group with terraform
While I am trying to deploy EKS via Terraform, I am facing an error with node-group creation. I am getting the following error:
Error: error waiting for EKS Node Group (Self-Hosted-Runner:Self-Hosted-Runner-default-node-group) to create:
unexpected state 'CREATE_FAILED', wanted target 'ACTIVE'.
last error: 1 error occurred:i-04db15f25be4212fb, i-07bd88adabaa103c0, i-0915982ac0f217fe4:
NodeCreationFailure: Instances failed to join the kubernetes cluster.
with module.eks.aws_eks_node_group.eks-node-group,
│ on ../../modules/aws/eks/eks-node-group.tf line 1, in resource "aws_eks_node_group" "eks-node-group":
│ 1: resource "aws_eks_node_group" "eks-node-group" {
EKS
# EKS Cluster Resources
resource "aws_eks_cluster" "eks" {
name = var.cluster-name
version = var.k8s-version
role_arn = aws_iam_role.cluster.arn
vpc_config {
security_group_ids = [var.security_group]
subnet_ids = var.private_subnets
}
enabled_cluster_log_types = var.eks-cw-logging
depends_on = [
aws_iam_role_policy_attachment.cluster-AmazonEKSClusterPolicy,
aws_iam_role_policy_attachment.cluster-AmazonEKSServicePolicy,
]
}
EKS-NODE-GROUP
resource "aws_eks_node_group" "eks-node-group" {
cluster_name = var.cluster-name
node_group_name = "${var.cluster-name}-default-node-group"
node_role_arn = aws_iam_role.node.arn
subnet_ids = var.private_subnets
capacity_type = "SPOT"
node_group_name_prefix = null #"Creates a unique name beginning with the specified prefix. Conflicts with node_group_name"
scaling_config {
desired_size = var.desired-capacity
max_size = var.max-size
min_size = var.min-size
}
update_config {
max_unavailable = 1
}
instance_types = [var.node-instance-type]
# Ensure that IAM Role permissions are created before and deleted after EKS Node Group handling.
# Otherwise, EKS will not be able to properly delete EC2 Instances and Elastic Network Interfaces.
depends_on = [
aws_eks_cluster.eks,
aws_iam_role_policy_attachment.node-AmazonEKSWorkerNodePolicy,
aws_iam_role_policy_attachment.node-AmazonEKS_CNI_Policy
]
tags = {
Name = "${var.cluster-name}-default-node-group"
}
}
IAM
# IAM
# CLUSTER
resource "aws_iam_role" "cluster" {
name = "${var.cluster-name}-eks-cluster-role"
assume_role_policy = <<POLICY
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "eks.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
POLICY
}
resource "aws_iam_role_policy_attachment" "cluster-AmazonEKSClusterPolicy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy"
role = aws_iam_role.cluster.name
}
resource "aws_iam_role_policy_attachment" "cluster-AmazonEKSServicePolicy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSServicePolicy"
role = aws_iam_role.cluster.name
}
# NODES
resource "aws_iam_role" "node" {
name = "${var.cluster-name}-eks-node-role"
assume_role_policy = <<POLICY
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "ec2.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
POLICY
}
resource "aws_iam_role_policy_attachment" "node-AmazonEKSWorkerNodePolicy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy"
role = aws_iam_role.node.name
}
resource "aws_iam_role_policy_attachment" "node-AmazonEKS_CNI_Policy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy"
role = aws_iam_role.node.name
}
resource "aws_iam_role_policy_attachment" "node-AmazonEC2ContainerRegistryReadOnly" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly"
role = aws_iam_role.node.name
}
resource "aws_iam_instance_profile" "node" {
name = "${var.cluster-name}-eks-node-instance-profile"
role = aws_iam_role.node.name
}
Security Group
# Create Security Group
resource "aws_security_group" "cluster" {
name = "terraform_cluster"
description = "AWS security group for terraform"
vpc_id = aws_vpc.vpc1.id
# Input
ingress {
from_port = "1"
to_port = "65365"
protocol = "TCP"
cidr_blocks = [var.address_allowed, var.vpc1_cidr_block]
}
# Output
egress {
from_port = 0 # any port
to_port = 0 # any port
protocol = "-1" # any protocol
cidr_blocks = ["0.0.0.0/0"] # any destination
}
# ICMP Ping
ingress {
from_port = -1
to_port = -1
protocol = "icmp"
cidr_blocks = [var.address_allowed, var.vpc1_cidr_block]
}
tags = merge(
{
Name = "onboarding-sg",
},
var.tags,
)
}
VPC
# Create VPC
resource "aws_vpc" "vpc1" {
cidr_block = var.vpc1_cidr_block
instance_tenancy = "default"
enable_dns_support = true
enable_dns_hostnames = true
tags = merge(
{
Name = "onboarding-vpc",
},
var.tags,
)
}
# Subnet Public
resource "aws_subnet" "subnet_public1" {
vpc_id = aws_vpc.vpc1.id
cidr_block = var.subnet_public1_cidr_block[0]
map_public_ip_on_launch = "true" #it makes this a public subnet
availability_zone = data.aws_availability_zones.available.names[0]
tags = merge(
{
Name = "onboarding-public-sub",
"kubernetes.io/role/elb" = "1"
},
var.tags,
)
}
# Subnet Private
resource "aws_subnet" "subnet_private1" {
for_each = { for idx, cidr_block in var.subnet_private1_cidr_block: cidr_block => idx}
vpc_id = aws_vpc.vpc1.id
cidr_block = each.key
map_public_ip_on_launch = "false" //it makes this a public subnet
availability_zone = data.aws_availability_zones.available.names[each.value]
tags = merge(
{
Name = "onboarding-private-sub",
"kubernetes.io/role/internal-elb" = "1",
"kubernetes.io/cluster/${var.cluster-name}" = "owned"
},
var.tags,
)
}
tfvars
#General vars
region = "eu-west-1"
#Bucket vars
bucket = "tf-state"
tag_name = "test"
tag_environment = "Dev"
acl = "private"
versioning_enabled = "Enabled"
# Network EKS vars
aws_public_key_path = "~/.ssh/id_rsa.pub"
aws_key_name = "aws-k8s"
address_allowed = "/32" # Office public IP Address
vpc1_cidr_block = "10.0.0.0/16"
subnet_public1_cidr_block = ["10.0.128.0/20", "10.0.144.0/20", "10.0.160.0/20"]
subnet_private1_cidr_block = ["10.0.0.0/19", "10.0.32.0/19", "10.0.64.0/19"]
tags = {
Scost = "testing",
Terraform = "true",
Environment = "testing"
}
#EKS
cluster-name = "Self-Hosted-Runner"
k8s-version = "1.21"
node-instance-type = "t3.medium"
desired-capacity = "3"
max-size = "7"
min-size = "1"
# db-subnet-cidr = ["10.0.192.0/21", "10.0.200.0/21", "10.0.208.0/21"]
eks-cw-logging = ["api", "audit", "authenticator", "controllerManager", "scheduler"]
ec2-key-public-key = ""
"issues" : [ {
"code" : "NodeCreationFailure",
"message" : "Instances failed to join the kubernetes cluster",
What do you think I missed configured?
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
