Secure kubernetes cluster with KOPS

Introduction

KOPS is one of the many available tools which lets you spin up kubernetes cluster. I’ve been using it successfully for years now.

Cluster spec

Below is the cluster spec I’m using (applies to kubernetes 1.16).

You will get a cluster with:

  • an ability to use {{ .KMS_KEY_ID }} for encrypting EBS volumes (PV/PVC) - you’ll need to configure StorageClass
  • node local dns cache (altho you have to deploy deamonset by yourself)
  • audit logs enabled
  • iptables configured to not track 53/UDP traffic (famous DNS races)
  • aws-iam-authenticator enabled - see docs here and here
  • selinux enabled
  • nodes registering to cluster using bootstrap tokens / node authorizer (you need to deploy this by yourself - docs)
  • etcd encrypted at rest, see this post
  • your cluster will pass CIS benchmark
---
apiVersion: kops.k8s.io/v1alpha2
kind: Cluster
metadata:
  name: {{ .KOPS_CLUSTER_NAME }}
spec:
  additionalPolicies:
    master: |
      [
        {
          "Sid": "kopsK8sRoute53ListZonesByName",
          "Effect": "Allow",
          "Action": [
              "route53:ListHostedZonesByName"
          ],
          "Resource": [
              "*"
          ]
        },
        {
          "Sid": "kopsK8sKMSEncrypted",
          "Effect": "Allow",
          "Action": [
            "kms:CreateGrant",
            "kms:Decrypt",
            "kms:DescribeKey",
            "kms:Encrypt",
            "kms:GenerateDataKey*",
            "kms:ReEncrypt*"
          ],
          "Resource": [
            "arn:aws:kms:{{ .AWS_REGION }}:{{ .AWS_ACCOUNT_NUMBER }}:key/{{ .KMS_KEY_ID }}"
          ]
        }
      ]
    node: |
      [
        {
          "Sid": "kopsK8sKMSEncrypted",
          "Effect": "Allow",
          "Action": [
            "kms:CreateGrant",
            "kms:Decrypt",
            "kms:DescribeKey",
            "kms:Encrypt",
            "kms:GenerateDataKey*",
            "kms:ReEncrypt*"
          ],
          "Resource": [
            "arn:aws:kms:{{ .AWS_REGION }}:{{ .AWS_ACCOUNT_NUMBER }}:key/{{ .KMS_KEY_ID }}"
          ]
        }
      ]
  api:
    loadBalancer:
      additionalSecurityGroups:
      - {{ .ADDITIONAL_AWS_API_SG }}
      crossZoneLoadBalancing: true
      idleTimeoutSeconds: 600
      type: Public
  authentication:
    aws: {}
  authorization:
    rbac: {}
  channel: stable
  cloudProvider: aws
  configBase: s3://{{ .KOPS_S3_BUCKET }}-{{ .AWS_REGION }}/{{ .KOPS_CLUSTER_NAME }}
  encryptionConfig: true
  etcdClusters:
  - cpuRequest: 200m
    etcdMembers:
    - encryptedVolume: true
      instanceGroup: master-{{ .AWS_REGION }}a
      name: a
    - encryptedVolume: true
      instanceGroup: master-{{ .AWS_REGION }}b
      name: b
    - encryptedVolume: true
      instanceGroup: master-{{ .AWS_REGION }}c
      name: c
    memoryRequest: 100Mi
    name: main
    version: 3.3.17
  - cpuRequest: 100m
    etcdMembers:
    - encryptedVolume: true
      instanceGroup: master-{{ .AWS_REGION }}a
      name: a
    - encryptedVolume: true
      instanceGroup: master-{{ .AWS_REGION }}b
      name: b
    - encryptedVolume: true
      instanceGroup: master-{{ .AWS_REGION }}c
      name: c
    memoryRequest: 100Mi
    name: events
    version: 3.3.17
  fileAssets:
  - content: |
      ---
      apiVersion: audit.k8s.io/v1
      kind: Policy
      rules:
        - level: None
          users: ["system:kube-proxy"]
          verbs: ["watch"]
          resources:
            - group: ""
              resources: ["endpoints", "services", "services/status"]
        - level: None
          users: ["system:unsecured"]
          namespaces: ["kube-system"]
          verbs: ["get"]
          resources:
            - group: ""
              resources: ["configmaps"]
        - level: None
          users: ["kubelet"]
          verbs: ["get"]
          resources:
            - group: ""
              resources: ["nodes", "nodes/status"]
        - level: None
          userGroups: ["system:nodes"]
          verbs: ["get"]
          resources:
            - group: ""
              resources: ["nodes", "nodes/status"]
        - level: None
          users:
            - system:kube-controller-manager
            - system:kube-scheduler
            - system:serviceaccount:kube-system:endpoint-controller
          verbs: ["get", "update"]
          namespaces: ["kube-system"]
          resources:
            - group: ""
              resources: ["endpoints"]
        - level: None
          users: ["system:apiserver"]
          verbs: ["get"]
          resources:
            - group: ""
              resources: ["namespaces", "namespaces/status", "namespaces/finalize"]
        - level: None
          users:
            - system:kube-controller-manager
          verbs: ["get", "list"]
          resources:
            - group: "metrics.k8s.io"
        - level: None
          nonResourceURLs:
            - /healthz*
            - /version
            - /swagger*
        - level: None
          resources:
            - group: ""
              resources: ["events"]
        - level: Request
          users: ["kubelet", "system:node-problem-detector", "system:serviceaccount:kube-system:node-problem-detector"]
          verbs: ["update","patch"]
          resources:
            - group: ""
              resources: ["nodes/status", "pods/status"]
          omitStages:
            - "RequestReceived"
        - level: Request
          userGroups: ["system:nodes"]
          verbs: ["update","patch"]
          resources:
            - group: ""
              resources: ["nodes/status", "pods/status"]
          omitStages:
            - "RequestReceived"
        - level: Request
          users: ["system:serviceaccount:kube-system:namespace-controller"]
          verbs: ["deletecollection"]
          omitStages:
            - "RequestReceived"
        - level: Metadata
          resources:
            - group: ""
              resources: ["secrets", "configmaps"]
            - group: authentication.k8s.io
              resources: ["tokenreviews"]
          omitStages:
            - "RequestReceived"
        - level: Request
          verbs: ["get", "list", "watch"]
          resources:
            - group: ""
            - group: "admissionregistration.k8s.io"
            - group: "apiextensions.k8s.io"
            - group: "apiregistration.k8s.io"
            - group: "apps"
            - group: "authentication.k8s.io"
            - group: "authorization.k8s.io"
            - group: "autoscaling"
            - group: "batch"
            - group: "certificates.k8s.io"
            - group: "extensions"
            - group: "metrics.k8s.io"
            - group: "networking.k8s.io"
            - group: "policy"
            - group: "rbac.authorization.k8s.io"
            - group: "scheduling.k8s.io"
            - group: "settings.k8s.io"
            - group: "storage.k8s.io"
          omitStages:
            - "RequestReceived"
        - level: RequestResponse
          resources:
            - group: ""
            - group: "admissionregistration.k8s.io"
            - group: "apiextensions.k8s.io"
            - group: "apiregistration.k8s.io"
            - group: "apps"
            - group: "authentication.k8s.io"
            - group: "authorization.k8s.io"
            - group: "autoscaling"
            - group: "batch"
            - group: "certificates.k8s.io"
            - group: "extensions"
            - group: "metrics.k8s.io"
            - group: "networking.k8s.io"
            - group: "policy"
            - group: "rbac.authorization.k8s.io"
            - group: "scheduling.k8s.io"
            - group: "settings.k8s.io"
            - group: "storage.k8s.io"
          omitStages:
            - "RequestReceived"
        - level: Metadata
          omitStages:
            - "RequestReceived"
    name: kubernetes-audit
    path: /srv/kubernetes/audit.yaml
    roles:
    - Master
  - content: |
      #!/usr/bin/env bash

      set -euo pipefail

      /usr/sbin/iptables -I PREROUTING 1 -t raw -p udp -d "${PRIVATE_EC2_IPV4}" --dport 53 -j NOTRACK
      /usr/sbin/iptables -I PREROUTING 1 -t raw -p tcp -d "${PRIVATE_EC2_IPV4}" --dport 53 -j NOTRACK
      /usr/sbin/iptables -I OUTPUT 1 -t raw -p udp -s "${PRIVATE_EC2_IPV4}" --sport 53 -j NOTRACK
      /usr/sbin/iptables -I OUTPUT 1 -t raw -p tcp -s "${PRIVATE_EC2_IPV4}" --sport 53 -j NOTRACK
      /usr/sbin/iptables -I INPUT 1 -t filter -p udp -d "${PRIVATE_EC2_IPV4}" --dport 53 -j ACCEPT
      /usr/sbin/iptables -I INPUT 1 -t filter -p tcp -d "${PRIVATE_EC2_IPV4}" --dport 53 -j ACCEPT
      /usr/sbin/iptables -I OUTPUT 1 -t filter -p udp -s "${PRIVATE_EC2_IPV4}" --sport 53 -j ACCEPT
      /usr/sbin/iptables -I OUTPUT 1 -t filter -p tcp -s "${PRIVATE_EC2_IPV4}" --sport 53 -j ACCEPT
    name: dns-conntrack-iptables
    path: /opt/bin/dns-conntrack-iptables
    roles:
    - Node
  hooks:
  - manifest: |
      [Unit]
      After=network.target
      Description=Set PRIVATE_EC2_IPV4 env

      [Service]
      ExecStart=/usr/bin/bash -euo pipefail -c "/usr/bin/systemctl set-environment PRIVATE_EC2_IPV4=$(/usr/bin/curl --silent --fail http://169.254.169.254/latest/meta-data/local-ipv4)"
      RemainAfterExit=yes

      [Install]
      WantedBy=multi-user.target
    name: private-ipv4.service
    roles:
    - Node
  - manifest: |
      [Unit]
      After=private-ipv4.service

      [Service]
      Type=oneshot
      ExecStart=sh /opt/bin/dns-conntrack-iptables

      [Install]
      WantedBy=multi-user.target
    name: dns-conntrack-iptables.service
    requires:
    - private-ipv4.service
    roles:
    - Node
  - before:
    - kubelet.service
    manifest: |
      [Unit]
      Description=Download AWS Authenticator configs from S3
      [Service]
      Type=oneshot
      ExecStart=/bin/mkdir -p /srv/kubernetes/aws-iam-authenticator
      ExecStart=/usr/bin/docker run --net=host --rm -v /srv/kubernetes/aws-iam-authenticator:/srv/kubernetes/aws-iam-authenticator quay.io/coreos/[email protected]:7b893bfb22ac582587798b011024f40871cd7424b9026595fd99c2b69492791d aws s3 cp --recursive s3://{{ .KOPS_S3_BUCKET }}-{{ .AWS_REGION }}/{{ .KOPS_CLUSTER_NAME }}/addons/authenticator /srv/kubernetes/aws-iam-authenticator/
    name: aws-authenticator
  - execContainer:
      command:
      - sh
      - -c
      - chroot /rootfs setenforce 1
      image: busybox
    name: enable-selinux
  iam:
    allowContainerRegistry: true
    legacy: false
  kubeAPIServer:
    auditLogMaxAge: 30
    auditLogMaxBackups: 10
    auditLogMaxSize: 100
    auditLogPath: /var/log/kube-apiserver-audit.log
    auditPolicyFile: /srv/kubernetes/audit.yaml
    authenticationTokenWebhookConfigFile: /srv/kubernetes/aws-iam-authenticator/kubeconfig.yaml
    authorizationMode: Node,RBAC
    disableBasicAuth: true
    enableAdmissionPlugins:
    - NamespaceLifecycle
    - NodeRestriction
    - LimitRanger
    - ServiceAccount
    - PersistentVolumeLabel
    - DefaultStorageClass
    - DefaultTolerationSeconds
    - MutatingAdmissionWebhook
    - ValidatingAdmissionWebhook
    - NodeRestriction
    - ResourceQuota
    - AlwaysPullImages
    - DenyEscalatingExec
    - PodSecurityPolicy
    enableBootstrapTokenAuth: true
    enableProfiling: false
    logLevel: 1
    runtimeConfig:
      autoscaling/v2beta1: "true"
    tlsCipherSuites:
    - TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256
    - TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256
    - TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
    - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384
    - TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305
    - TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384
    - TLS_RSA_WITH_AES_256_GCM_SHA384
    - TLS_RSA_WITH_AES_128_GCM_SHA256
    tlsMinVersion: VersionTLS12
  kubeControllerManager:
    controllers:
    - '*'
    - tokencleaner
    enableProfiling: false
    horizontalPodAutoscalerSyncPeriod: 15s
    horizontalPodAutoscalerUseRestClients: true
    logLevel: 1
    terminatedPodGCThreshold: 10
  kubeDNS:
    provider: CoreDNS
  kubeProxy:
    logLevel: 1
  kubeScheduler:
    logLevel: 1
    enableProfiling: false
  kubelet:
    anonymousAuth: false
    authenticationTokenWebhook: true
    authorizationMode: Webhook
    clusterDNS: 169.254.20.10
    enforceNodeAllocatable: pods,kube-reserved
    kubeReserved:
      cpu: 100m
      ephemeral-storage: 1Gi
      memory: 200Mi
    kubeReservedCgroup: /kube-reserved
    kubeletCgroups: /kube-reserved
    logLevel: 1
    protectKernelDefaults: true
    readOnlyPort: 0
    runtimeCgroups: /kube-reserved
    tlsCipherSuites:
    - TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256
    - TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256
    - TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
    - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384
    - TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305
    - TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384
    - TLS_RSA_WITH_AES_256_GCM_SHA384
    - TLS_RSA_WITH_AES_128_GCM_SHA256
    tlsMinVersion: VersionTLS12
  kubernetesApiAccess:
  - {{ .API_ACCESS_CIDRS }}
  kubernetesVersion: 1.16.10
  masterInternalName: api.internal.{{ .KOPS_CLUSTER_NAME }}
  masterKubelet:
    clusterDNS: 100.64.0.10
  masterPublicName: api.{{ .KOPS_CLUSTER_NAME }}
  networkCIDR: {{ .AWS_VPC_CIDR }}
  networkID: {{ .AWS_VPC_ID }}
  networking:
    calico:
      crossSubnet: true
      logSeverityScreen: error
      majorVersion: v3
  nodeAuthorization:
    nodeAuthorizer:
      image: {{ .NODE_AUTHORIZER_IMAGE }}
  nonMasqueradeCIDR: 100.64.0.0/10
  sshAccess:
  - {{ .SSH_ACCESS_CIDRS }}
  subnets:
  - cidr: {{ .SUBNET_PRIVATE_CIDR_REGION_A }}
    id: {{ .SUBNET_PRIVATE_ID_REGION_A }}
    name: {{ .AWS_REGION }}a
    type: Private
    zone: {{ .AWS_REGION }}a
  - cidr: {{ .SUBNET_PRIVATE_CIDR_REGION_B }}
    id: {{ .SUBNET_PRIVATE_ID_REGION_B }}
    name: {{ .AWS_REGION }}b
    type: Private
    zone: {{ .AWS_REGION }}b
  - cidr: {{ .SUBNET_PRIVATE_CIDR_REGION_C }}
    id: {{ .SUBNET_PRIVATE_ID_REGION_C }}
    name: {{ .AWS_REGION }}c
    type: Private
    zone: {{ .AWS_REGION }}c
  - cidr: {{ .SUBNET_PUBLIC_CIDR_REGION_A }}
    id: {{ .SUBNET_PUBLIC_ID_REGION_A }}
    name: utility-{{ .AWS_REGION }}a
    type: Utility
    zone: {{ .AWS_REGION }}a
  - cidr: {{ .SUBNET_PUBLIC_CIDR_REGION_B }}
    id: {{ .SUBNET_PUBLIC_ID_REGION_B }}
    name: utility-{{ .AWS_REGION }}b
    type: Utility
    zone: {{ .AWS_REGION }}b
  - cidr: {{ .SUBNET_PUBLIC_CIDR_REGION_C }}
    id: {{ .SUBNET_PUBLIC_ID_REGION_C }}
    name: utility-{{ .AWS_REGION }}c
    type: Utility
    zone: {{ .AWS_REGION }}c
  target:
    terraform:
      providerExtraConfig:
        alias: ignoreprovider
  topology:
    bastion:
      bastionPublicName: bastion.{{ .KOPS_CLUSTER_NAME }}
    dns:
      type: Public
    masters: private
    nodes: private
  updatePolicy: external