跳转到主要内容

Kubernetes安全加固实战:从集群到工作负载的全方位防护

博主
14 分钟
2955 字
--

AI 导读

深刻理解和准确把握"Kubernetes安全加固实战:从集群到工作负载的全方位防护"这一重要概念的核心要义,本文从理论基础、实践应用和发展前景等多个维度进行了系统性阐述,为读者提供了全面而深入的分析视角。

内容由AI智能生成

Kubernetes安全加固实战:从集群到工作负载的全方位防护

Kubernetes作为容器编排的事实标准,其安全性直接影响整个云原生应用的安全态势。本文将深入探讨Kubernetes安全加固的最佳实践,从集群层面到工作负载层面的全方位防护策略。

Kubernetes安全架构

安全层次模型

graph TB
    A[物理/云基础设施安全] --> B[集群安全]
    B --> C[节点安全]
    C --> D[网络安全]
    D --> E[工作负载安全]
    E --> F[应用安全]
    
    B1[API Server安全] --> B
    B2[etcd加密] --> B
    B3[证书管理] --> B
    
    C1[节点加固] --> C
    C2[容器运行时安全] --> C
    C3[镜像安全] --> C
    
    D1[网络策略] --> D
    D2[服务网格] --> D
    D3[入口控制] --> D
    
    E1[Pod安全标准] --> E
    E2[RBAC] --> E
    E3[准入控制] --> E
    
    F1[应用代码安全] --> F
    F2[密钥管理] --> F
    F3[运行时保护] --> F

威胁模型分析

威胁类别 攻击向量 影响范围 防护措施
未授权访问 API Server暴露 整个集群 认证、授权、网络隔离
权限提升 RBAC配置错误 命名空间/集群 最小权限原则、定期审计
容器逃逸 内核漏洞、特权容器 节点 安全上下文、运行时保护
数据泄露 密钥暴露、存储未加密 应用数据 密钥管理、加密存储
供应链攻击 恶意镜像 工作负载 镜像扫描、签名验证
网络攻击 横向移动 多个服务 网络策略、零信任

集群安全配置

API Server安全加固

# kube-apiserver配置
apiVersion: v1
kind: Pod
metadata:
  name: kube-apiserver
  namespace: kube-system
spec:
  containers:
  - name: kube-apiserver
    image: k8s.gcr.io/kube-apiserver:v1.28.0
    command:
    - kube-apiserver
    # 基础安全配置
    - --secure-port=6443
    - --insecure-port=0
    - --bind-address=0.0.0.0
    
    # 认证配置
    - --client-ca-file=/etc/kubernetes/pki/ca.crt
    - --tls-cert-file=/etc/kubernetes/pki/apiserver.crt
    - --tls-private-key-file=/etc/kubernetes/pki/apiserver.key
    - --tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256
    - --tls-min-version=VersionTLS12
    
    # 授权配置
    - --authorization-mode=Node,RBAC
    - --enable-admission-plugins=NodeRestriction,PodSecurityPolicy,ResourceQuota,LimitRanger
    
    # 审计配置
    - --audit-log-path=/var/log/audit.log
    - --audit-log-maxage=30
    - --audit-log-maxbackup=10
    - --audit-log-maxsize=100
    - --audit-policy-file=/etc/kubernetes/audit-policy.yaml
    
    # etcd安全
    - --etcd-cafile=/etc/kubernetes/pki/etcd/ca.crt
    - --etcd-certfile=/etc/kubernetes/pki/apiserver-etcd-client.crt
    - --etcd-keyfile=/etc/kubernetes/pki/apiserver-etcd-client.key
    - --etcd-servers=https://127.0.0.1:2379
    
    # 其他安全选项
    - --anonymous-auth=false
    - --enable-bootstrap-token-auth=false
    - --profiling=false
    - --repair-malformed-updates=false
    - --service-account-lookup=true
    - --service-account-key-file=/etc/kubernetes/pki/sa.pub
    - --service-account-signing-key-file=/etc/kubernetes/pki/sa.key
    
    volumeMounts:
    - name: ca-certs
      mountPath: /etc/ssl/certs
      readOnly: true
    - name: etc-ca-certificates
      mountPath: /etc/ca-certificates
      readOnly: true
    - name: k8s-certs
      mountPath: /etc/kubernetes/pki
      readOnly: true
    - name: usr-local-share-ca-certificates
      mountPath: /usr/local/share/ca-certificates
      readOnly: true
    - name: usr-share-ca-certificates
      mountPath: /usr/share/ca-certificates
      readOnly: true

审计策略配置

# /etc/kubernetes/audit-policy.yaml
apiVersion: audit.k8s.io/v1
kind: Policy
rules:
# 记录所有认证失败
- level: Metadata
  namespaces: [""]
  verbs: [""]
  resources:
  - group: ""
    resources: [""]
  omitStages:
  - RequestReceived

# 记录密钥和配置映射的访问
- level: RequestResponse
  resources:
  - group: ""
    resources: ["secrets", "configmaps"]

# 记录所有RBAC变更
- level: RequestResponse
  resources:
  - group: "rbac.authorization.k8s.io"
    resources: ["roles", "rolebindings", "clusterroles", "clusterrolebindings"]

# 记录Pod的创建、更新、删除
- level: Request
  resources:
  - group: ""
    resources: ["pods"]
  verbs: ["create", "update", "patch", "delete"]

# 记录服务账户令牌的创建
- level: Metadata
  resources:
  - group: ""
    resources: ["serviceaccounts/token"]

# 记录所有准入控制器的决策
- level: Request
  users: ["system:serviceaccount:kube-system:generic-garbage-collector"]
  verbs: ["get", "list", "watch"]
  resources:
  - group: ""
    resources: ["*"]

# 默认规则:记录元数据
- level: Metadata
  omitStages:
  - RequestReceived

etcd安全配置

# etcd安全配置
apiVersion: v1
kind: Pod
metadata:
  name: etcd
  namespace: kube-system
spec:
  containers:
  - name: etcd
    image: k8s.gcr.io/etcd:3.5.9-0
    command:
    - etcd
    # 基础配置
    - --name=master
    - --data-dir=/var/lib/etcd
    - --listen-client-urls=https://127.0.0.1:2379
    - --advertise-client-urls=https://127.0.0.1:2379
    - --listen-peer-urls=https://127.0.0.1:2380
    - --initial-advertise-peer-urls=https://127.0.0.1:2380
    - --initial-cluster=master=https://127.0.0.1:2380
    - --initial-cluster-token=etcd-cluster-1
    - --initial-cluster-state=new
    
    # 安全配置
    - --client-cert-auth=true
    - --trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt
    - --cert-file=/etc/kubernetes/pki/etcd/server.crt
    - --key-file=/etc/kubernetes/pki/etcd/server.key
    - --peer-client-cert-auth=true
    - --peer-trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt
    - --peer-cert-file=/etc/kubernetes/pki/etcd/peer.crt
    - --peer-key-file=/etc/kubernetes/pki/etcd/peer.key
    
    # 数据加密
    - --experimental-encryption-provider-config=/etc/kubernetes/encryption-config.yaml
    
    volumeMounts:
    - name: etcd-data
      mountPath: /var/lib/etcd
    - name: etcd-certs
      mountPath: /etc/kubernetes/pki/etcd
      readOnly: true

数据加密配置

# /etc/kubernetes/encryption-config.yaml
apiVersion: apiserver.config.k8s.io/v1
kind: EncryptionConfiguration
resources:
- resources:
  - secrets
  - configmaps
  - events
  providers:
  - aescbc:
      keys:
      - name: key1
        secret: <base64-encoded-32-byte-key>
  - identity: {}
- resources:
  - persistentvolumes
  - persistentvolumeclaims
  providers:
  - aescbc:
      keys:
      - name: key1
        secret: <base64-encoded-32-byte-key>
  - identity: {}

RBAC权限管理

最小权限原则实现

# 服务账户创建
apiVersion: v1
kind: ServiceAccount
metadata:
  name: app-service-account
  namespace: production
automountServiceAccountToken: false

---
# 角色定义
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
  namespace: production
  name: app-role
rules:
# 只允许读取ConfigMap和Secret
- apiGroups: [""]
  resources: ["configmaps", "secrets"]
  verbs: ["get", "list"]
  resourceNames: ["app-config", "app-secrets"]

# 只允许更新自己的Pod状态
- apiGroups: [""]
  resources: ["pods"]
  verbs: ["get", "list", "patch"]
  resourceNames: []

# 允许创建事件
- apiGroups: [""]
  resources: ["events"]
  verbs: ["create"]

---
# 角色绑定
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  name: app-role-binding
  namespace: production
subjects:
- kind: ServiceAccount
  name: app-service-account
  namespace: production
roleRef:
  kind: Role
  name: app-role
  apiGroup: rbac.authorization.k8s.io

---
# 集群级别的只读角色
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: monitoring-reader
rules:
- apiGroups: [""]
  resources: ["nodes", "nodes/metrics", "services", "endpoints", "pods"]
  verbs: ["get", "list", "watch"]
- apiGroups: ["extensions", "apps"]
  resources: ["deployments", "replicasets"]
  verbs: ["get", "list", "watch"]
- apiGroups: ["metrics.k8s.io"]
  resources: ["nodes", "pods"]
  verbs: ["get", "list"]

---
# 开发者角色(命名空间级别)
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
  namespace: development
  name: developer-role
rules:
# 完全控制大部分资源
- apiGroups: ["", "apps", "extensions"]
  resources: ["*"]
  verbs: ["*"]
# 但不能访问密钥
- apiGroups: [""]
  resources: ["secrets"]
  verbs: ["get", "list"]
  resourceNames: ["allowed-secret-1", "allowed-secret-2"]
# 不能删除命名空间
- apiGroups: [""]
  resources: ["namespaces"]
  verbs: ["get", "list"]

---
# 运维角色(集群级别)
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: ops-role
rules:
# 节点管理
- apiGroups: [""]
  resources: ["nodes"]
  verbs: ["get", "list", "watch", "patch", "update"]
# 系统命名空间访问
- apiGroups: [""]
  resources: ["*"]
  verbs: ["*"]
  resourceNames: []
# 但限制在特定命名空间
- apiGroups: [""]
  resources: ["namespaces"]
  verbs: ["get", "list", "watch"]
  resourceNames: ["kube-system", "kube-public", "monitoring"]

动态RBAC管理

// rbac-manager.go
package main

import (
    "context"
    "fmt"
    "time"

    rbacv1 "k8s.io/api/rbac/v1"
    metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    "k8s.io/client-go/kubernetes"
    "k8s.io/client-go/rest"
)

type RBACManager struct {
    clientset *kubernetes.Clientset
}

func NewRBACManager() (*RBACManager, error) {
    config, err := rest.InClusterConfig()
    if err != nil {
        return nil, err
    }

    clientset, err := kubernetes.NewForConfig(config)
    if err != nil {
        return nil, err
    }

    return &RBACManager{clientset: clientset}, nil
}

// 创建临时访问权限
func (r *RBACManager) CreateTemporaryAccess(username, namespace string, duration time.Duration) error {
    // 创建角色
    role := &rbacv1.Role{
        ObjectMeta: metav1.ObjectMeta{
            Name:      fmt.Sprintf("temp-access-%s", username),
            Namespace: namespace,
            Annotations: map[string]string{
                "rbac.security.io/expires-at": time.Now().Add(duration).Format(time.RFC3339),
                "rbac.security.io/created-by": "rbac-manager",
            },
        },
        Rules: []rbacv1.PolicyRule{
            {
                APIGroups: []string{""},
                Resources: []string{"pods", "services", "configmaps"},
                Verbs:     []string{"get", "list", "watch"},
            },
        },
    }

    _, err := r.clientset.RbacV1().Roles(namespace).Create(context.TODO(), role, metav1.CreateOptions{})
    if err != nil {
        return err
    }

    // 创建角色绑定
    roleBinding := &rbacv1.RoleBinding{
        ObjectMeta: metav1.ObjectMeta{
            Name:      fmt.Sprintf("temp-binding-%s", username),
            Namespace: namespace,
            Annotations: map[string]string{
                "rbac.security.io/expires-at": time.Now().Add(duration).Format(time.RFC3339),
                "rbac.security.io/created-by": "rbac-manager",
            },
        },
        Subjects: []rbacv1.Subject{
            {
                Kind: "User",
                Name: username,
            },
        },
        RoleRef: rbacv1.RoleRef{
            Kind:     "Role",
            Name:     role.Name,
            APIGroup: "rbac.authorization.k8s.io",
        },
    }

    _, err = r.clientset.RbacV1().RoleBindings(namespace).Create(context.TODO(), roleBinding, metav1.CreateOptions{})
    return err
}

// 清理过期权限
func (r *RBACManager) CleanupExpiredPermissions() error {
    namespaces, err := r.clientset.CoreV1().Namespaces().List(context.TODO(), metav1.ListOptions{})
    if err != nil {
        return err
    }

    now := time.Now()

    for _, ns := range namespaces.Items {
        // 清理过期角色
        roles, err := r.clientset.RbacV1().Roles(ns.Name).List(context.TODO(), metav1.ListOptions{})
        if err != nil {
            continue
        }

        for _, role := range roles.Items {
            if expiresAt, exists := role.Annotations["rbac.security.io/expires-at"]; exists {
                expireTime, err := time.Parse(time.RFC3339, expiresAt)
                if err == nil && now.After(expireTime) {
                    r.clientset.RbacV1().Roles(ns.Name).Delete(context.TODO(), role.Name, metav1.DeleteOptions{})
                }
            }
        }

        // 清理过期角色绑定
        roleBindings, err := r.clientset.RbacV1().RoleBindings(ns.Name).List(context.TODO(), metav1.ListOptions{})
        if err != nil {
            continue
        }

        for _, binding := range roleBindings.Items {
            if expiresAt, exists := binding.Annotations["rbac.security.io/expires-at"]; exists {
                expireTime, err := time.Parse(time.RFC3339, expiresAt)
                if err == nil && now.After(expireTime) {
                    r.clientset.RbacV1().RoleBindings(ns.Name).Delete(context.TODO(), binding.Name, metav1.DeleteOptions{})
                }
            }
        }
    }

    return nil
}

// 权限审计
func (r *RBACManager) AuditPermissions() (map[string][]string, error) {
    audit := make(map[string][]string)

    // 获取所有ClusterRoleBindings
    clusterBindings, err := r.clientset.RbacV1().ClusterRoleBindings().List(context.TODO(), metav1.ListOptions{})
    if err != nil {
        return nil, err
    }

    for _, binding := range clusterBindings.Items {
        for _, subject := range binding.Subjects {
            key := fmt.Sprintf("%s/%s", subject.Kind, subject.Name)
            audit[key] = append(audit[key], fmt.Sprintf("ClusterRole: %s", binding.RoleRef.Name))
        }
    }

    // 获取所有命名空间的RoleBindings
    namespaces, err := r.clientset.CoreV1().Namespaces().List(context.TODO(), metav1.ListOptions{})
    if err != nil {
        return audit, nil
    }

    for _, ns := range namespaces.Items {
        bindings, err := r.clientset.RbacV1().RoleBindings(ns.Name).List(context.TODO(), metav1.ListOptions{})
        if err != nil {
            continue
        }

        for _, binding := range bindings.Items {
            for _, subject := range binding.Subjects {
                key := fmt.Sprintf("%s/%s", subject.Kind, subject.Name)
                audit[key] = append(audit[key], fmt.Sprintf("Role: %s (namespace: %s)", binding.RoleRef.Name, ns.Name))
            }
        }
    }

    return audit, nil
}

网络安全策略

网络策略配置

# 默认拒绝所有入站流量
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  name: default-deny-ingress
  namespace: production
spec:
  podSelector: {}
  policyTypes:
  - Ingress

---
# 默认拒绝所有出站流量
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  name: default-deny-egress
  namespace: production
spec:
  podSelector: {}
  policyTypes:
  - Egress

---
# 允许前端访问后端
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  name: frontend-to-backend
  namespace: production
spec:
  podSelector:
    matchLabels:
      app: backend
  policyTypes:
  - Ingress
  ingress:
  - from:
    - podSelector:
        matchLabels:
          app: frontend
    ports:
    - protocol: TCP
      port: 8080

---
# 允许后端访问数据库
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  name: backend-to-database
  namespace: production
spec:
  podSelector:
    matchLabels:
      app: backend
  policyTypes:
  - Egress
  egress:
  - to:
    - podSelector:
        matchLabels:
          app: database
    ports:
    - protocol: TCP
      port: 5432
  # 允许DNS解析
  - to: []
    ports:
    - protocol: UDP
      port: 53

---
# 允许监控系统访问所有Pod
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  name: monitoring-access
  namespace: production
spec:
  podSelector: {}
  policyTypes:
  - Ingress
  ingress:
  - from:
    - namespaceSelector:
        matchLabels:
          name: monitoring
    ports:
    - protocol: TCP
      port: 8080
    - protocol: TCP
      port: 9090

---
# 跨命名空间访问控制
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  name: cross-namespace-api
  namespace: production
spec:
  podSelector:
    matchLabels:
      app: api-gateway
  policyTypes:
  - Ingress
  ingress:
  - from:
    - namespaceSelector:
        matchLabels:
          environment: staging
      podSelector:
        matchLabels:
          app: test-client
    ports:
    - protocol: TCP
      port: 443

Calico高级网络策略

# Calico GlobalNetworkPolicy
apiVersion: projectcalico.org/v3
kind: GlobalNetworkPolicy
metadata:
  name: security-controls
spec:
  # 应用到所有命名空间
  namespaceSelector: has(projectcalico.org/name)
  
  # 拒绝所有未明确允许的流量
  types:
  - Ingress
  - Egress
  
  # 入站规则
  ingress:
  # 允许同一命名空间内的通信
  - action: Allow
    source:
      namespaceSelector: projectcalico.org/name == global()
  
  # 允许来自ingress控制器的流量
  - action: Allow
    source:
      namespaceSelector: projectcalico.org/name == "ingress-nginx"
    destination:
      ports:
      - 80
      - 443
  
  # 出站规则
  egress:
  # 允许DNS查询
  - action: Allow
    protocol: UDP
    destination:
      ports:
      - 53
  
  # 允许HTTPS出站(用于拉取镜像等)
  - action: Allow
    protocol: TCP
    destination:
      ports:
      - 443
  
  # 允许访问Kubernetes API
  - action: Allow
    protocol: TCP
    destination:
      services:
        name: kubernetes
        namespace: default

---
# 基于时间的访问控制
apiVersion: projectcalico.org/v3
kind: NetworkPolicy
metadata:
  name: business-hours-only
  namespace: production
spec:
  selector: app == "admin-panel"
  types:
  - Ingress
  ingress:
  - action: Allow
    source:
      selector: role == "admin"
    # 仅在工作时间允许访问(需要配合外部控制器)
    metadata:
      annotations:
        schedule: "0 9-17 * * 1-5"  # 周一到周五 9-17点

---
# 地理位置限制
apiVersion: projectcalico.org/v3
kind: GlobalNetworkPolicy
metadata:
  name: geo-restriction
spec:
  selector: app == "sensitive-app"
  types:
  - Ingress
  ingress:
  - action: Deny
    source:
      nets:
      # 拒绝来自特定国家/地区的IP段
      - 192.0.2.0/24
      - 203.0.113.0/24
  - action: Allow
    source:
      nets:
      # 只允许来自公司IP段
      - 10.0.0.0/8
      - 172.16.0.0/12

Pod安全标准

Pod Security Standards配置

# 命名空间级别的Pod安全策略
apiVersion: v1
kind: Namespace
metadata:
  name: production
  labels:
    # 强制执行restricted策略
    pod-security.kubernetes.io/enforce: restricted
    pod-security.kubernetes.io/enforce-version: v1.28
    
    # 对baseline策略发出警告
    pod-security.kubernetes.io/warn: baseline
    pod-security.kubernetes.io/warn-version: v1.28
    
    # 审计所有违反privileged策略的行为
    pod-security.kubernetes.io/audit: privileged
    pod-security.kubernetes.io/audit-version: v1.28

---
# 安全的Pod配置示例
apiVersion: v1
kind: Pod
metadata:
  name: secure-app
  namespace: production
spec:
  # 使用非特权服务账户
  serviceAccountName: app-service-account
  automountServiceAccountToken: false
  
  # 安全上下文
  securityContext:
    # 运行为非root用户
    runAsNonRoot: true
    runAsUser: 1000
    runAsGroup: 1000
    fsGroup: 1000
    
    # 设置seccomp配置
    seccompProfile:
      type: RuntimeDefault
    
    # 设置SELinux选项
    seLinuxOptions:
      level: "s0:c123,c456"
  
  containers:
  - name: app
    image: myapp:v1.2.3
    
    # 容器安全上下文
    securityContext:
      # 禁止特权模式
      privileged: false
      
      # 禁止权限提升
      allowPrivilegeEscalation: false
      
      # 只读根文件系统
      readOnlyRootFilesystem: true
      
      # 删除所有capabilities
      capabilities:
        drop:
        - ALL
        # 只添加必需的capabilities
        add:
        - NET_BIND_SERVICE
    
    # 资源限制
    resources:
      requests:
        memory: "64Mi"
        cpu: "250m"
      limits:
        memory: "128Mi"
        cpu: "500m"
    
    # 存活性和就绪性探针
    livenessProbe:
      httpGet:
        path: /health
        port: 8080
        scheme: HTTP
      initialDelaySeconds: 30
      periodSeconds: 10
      timeoutSeconds: 5
      failureThreshold: 3
    
    readinessProbe:
      httpGet:
        path: /ready
        port: 8080
        scheme: HTTP
      initialDelaySeconds: 5
      periodSeconds: 5
      timeoutSeconds: 3
      failureThreshold: 3
    
    # 挂载临时卷用于写入
    volumeMounts:
    - name: tmp-volume
      mountPath: /tmp
    - name: cache-volume
      mountPath: /app/cache
  
  volumes:
  - name: tmp-volume
    emptyDir: {}
  - name: cache-volume
    emptyDir: {}

准入控制器配置

# OPA Gatekeeper约束模板
apiVersion: templates.gatekeeper.sh/v1beta1
kind: ConstraintTemplate
metadata:
  name: k8srequiredsecuritycontext
spec:
  crd:
    spec:
      names:
        kind: K8sRequiredSecurityContext
      validation:
        type: object
        properties:
          runAsNonRoot:
            type: boolean
          runAsUser:
            type: integer
          fsGroup:
            type: integer
  targets:
    - target: admission.k8s.gatekeeper.sh
      rego: |
        package k8srequiredsecuritycontext

        violation[{"msg": msg}] {
          container := input.review.object.spec.containers[_]
          not container.securityContext.runAsNonRoot
          msg := "Container must run as non-root user"
        }

        violation[{"msg": msg}] {
          not input.review.object.spec.securityContext.runAsUser
          msg := "Must specify runAsUser in securityContext"
        }

        violation[{"msg": msg}] {
          input.review.object.spec.securityContext.runAsUser == 0
          msg := "Container must not run as root (runAsUser: 0)"
        }

        violation[{"msg": msg}] {
          container := input.review.object.spec.containers[_]
          container.securityContext.privileged
          msg := "Privileged containers are not allowed"
        }

---
# 应用约束
apiVersion: constraints.gatekeeper.sh/v1beta1
kind: K8sRequiredSecurityContext
metadata:
  name: must-have-security-context
spec:
  match:
    kinds:
      - apiGroups: [""]
        kinds: ["Pod"]
    namespaces: ["production", "staging"]
  parameters:
    runAsNonRoot: true
    runAsUser: 1000
    fsGroup: 1000

---
# 镜像安全约束
apiVersion: templates.gatekeeper.sh/v1beta1
kind: ConstraintTemplate
metadata:
  name: k8sallowedrepos
spec:
  crd:
    spec:
      names:
        kind: K8sAllowedRepos
      validation:
        type: object
        properties:
          repos:
            type: array
            items:
              type: string
  targets:
    - target: admission.k8s.gatekeeper.sh
      rego: |
        package k8sallowedrepos

        violation[{"msg": msg}] {
          container := input.review.object.spec.containers[_]
          satisfied := [good | repo = input.parameters.repos[_] ; good = startswith(container.image, repo)]
          not any(satisfied)
          msg := sprintf("Container image <%v> comes from untrusted registry", [container.image])
        }

---
apiVersion: constraints.gatekeeper.sh/v1beta1
kind: K8sAllowedRepos
metadata:
  name: must-come-from-trusted-registry
spec:
  match:
    kinds:
      - apiGroups: [""]
        kinds: ["Pod"]
  parameters:
    repos:
      - "gcr.io/my-company/"
      - "registry.company.com/"
      - "docker.io/library/"

容器镜像安全

镜像扫描和签名

# Trivy镜像扫描配置
apiVersion: v1
kind: ConfigMap
metadata:
  name: trivy-config
  namespace: security
data:
  trivy.yaml: |
    # 扫描配置
    scan:
      security-checks: vuln,config,secret
      severity: CRITICAL,HIGH,MEDIUM
      ignore-unfixed: false
      
    # 输出配置
    format: json
    output: /tmp/trivy-report.json
    
    # 数据库配置
    cache-dir: /tmp/trivy-cache
    
    # 忽略文件
    ignorefile: .trivyignore

---
# 镜像扫描Job
apiVersion: batch/v1
kind: Job
metadata:
  name: image-scan
  namespace: security
spec:
  template:
    spec:
      restartPolicy: Never
      containers:
      - name: trivy
        image: aquasec/trivy:latest
        command:
        - trivy
        - image
        - --config
        - /config/trivy.yaml
        - --exit-code
        - "1"  # 发现漏洞时退出码为1
        - $(IMAGE_NAME)
        env:
        - name: IMAGE_NAME
          value: "myapp:latest"
        volumeMounts:
        - name: config
          mountPath: /config
        - name: cache
          mountPath: /tmp/trivy-cache
      volumes:
      - name: config
        configMap:
          name: trivy-config
      - name: cache
        emptyDir: {}

---
# Cosign镜像签名验证
apiVersion: v1
kind: ConfigMap
metadata:
  name: cosign-policy
  namespace: security
data:
  policy.yaml: |
    apiVersion: v1alpha1
    kind: ClusterImagePolicy
    metadata:
      name: signed-images-policy
    spec:
      images:
      - glob: "gcr.io/my-company/*"
      authorities:
      - keyless:
          url: https://fulcio.sigstore.dev
          identities:
          - issuer: https://accounts.google.com
            subject: build@company.com
      - key:
          data: |
            -----BEGIN PUBLIC KEY-----
            MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE...
            -----END PUBLIC KEY-----

---
# 镜像策略执行器
apiVersion: apps/v1
kind: Deployment
metadata:
  name: image-policy-webhook
  namespace: security
spec:
  replicas: 2
  selector:
    matchLabels:
      app: image-policy-webhook
  template:
    metadata:
      labels:
        app: image-policy-webhook
    spec:
      containers:
      - name: webhook
        image: sigstore/cosign:latest
        command:
        - cosign
        - webhook
        - --port=8443
        - --cert=/certs/tls.crt
        - --key=/certs/tls.key
        ports:
        - containerPort: 8443
        volumeMounts:
        - name: certs
          mountPath: /certs
          readOnly: true
        - name: policy
          mountPath: /policy
          readOnly: true
      volumes:
      - name: certs
        secret:
          secretName: webhook-certs
      - name: policy
        configMap:
          name: cosign-policy

运行时安全监控

# Falco安全监控配置
apiVersion: v1
kind: ConfigMap
metadata:
  name: falco-config
  namespace: security
data:
  falco.yaml: |
    # 规则文件
    rules_file:
      - /etc/falco/falco_rules.yaml
      - /etc/falco/falco_rules.local.yaml
      - /etc/falco/k8s_audit_rules.yaml
    
    # 输出配置
    json_output: true
    json_include_output_property: true
    
    # 日志级别
    log_level: info
    
    # 输出通道
    stdout_output:
      enabled: true
    
    syslog_output:
      enabled: false
    
    file_output:
      enabled: true
      keep_alive: false
      filename: /var/log/falco.log
    
    http_output:
      enabled: true
      url: "http://falco-exporter:9376/events"
    
    # 系统调用监控
    syscall_event_drops:
      actions:
        - log
        - alert
      rate: 0.03333
      max_burst: 1000
    
    # 规则匹配
    priority: debug
    
  falco_rules.local.yaml: |
    # 自定义规则
    - rule: Detect crypto mining
      desc: Detect cryptocurrency mining
      condition: >
        spawned_process and
        (proc.name in (cryptonight, xmrig, minergate) or
         proc.cmdline contains "stratum+tcp" or
         proc.cmdline contains "pool.minergate.com")
      output: >
        Cryptocurrency mining detected (user=%user.name command=%proc.cmdline
        container=%container.name image=%container.image.repository)
      priority: CRITICAL
      tags: [cryptocurrency, mining]
    
    - rule: Detect reverse shell
      desc: Detect reverse shell attempts
      condition: >
        spawned_process and
        (proc.name in (nc, ncat, netcat, socat) and
         (proc.cmdline contains "-e" or proc.cmdline contains "-c"))
      output: >
        Reverse shell detected (user=%user.name command=%proc.cmdline
        container=%container.name image=%container.image.repository)
      priority: CRITICAL
      tags: [shell, reverse_shell]
    
    - rule: Detect privilege escalation
      desc: Detect attempts to escalate privileges
      condition: >
        spawned_process and
        (proc.name in (sudo, su, doas) or
         proc.cmdline contains "chmod +s" or
         proc.cmdline contains "setuid")
      output: >
        Privilege escalation attempt (user=%user.name command=%proc.cmdline
        container=%container.name image=%container.image.repository)
      priority: HIGH
      tags: [privilege_escalation]

---
# Falco DaemonSet
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: falco
  namespace: security
spec:
  selector:
    matchLabels:
      app: falco
  template:
    metadata:
      labels:
        app: falco
    spec:
      serviceAccountName: falco
      hostNetwork: true
      hostPID: true
      containers:
      - name: falco
        image: falcosecurity/falco:latest
        securityContext:
          privileged: true
        args:
        - /usr/bin/falco
        - --cri=/run/containerd/containerd.sock
        - --k8s-api=https://kubernetes.default.svc.cluster.local
        - --k8s-api-cert=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        - --k8s-api-token=/var/run/secrets/kubernetes.io/serviceaccount/token
        volumeMounts:
        - name: dev-fs
          mountPath: /host/dev
          readOnly: true
        - name: proc-fs
          mountPath: /host/proc
          readOnly: true
        - name: boot-fs
          mountPath: /host/boot
          readOnly: true
        - name: lib-modules
          mountPath: /host/lib/modules
          readOnly: true
        - name: usr-fs
          mountPath: /host/usr
          readOnly: true
        - name: etc-fs
          mountPath: /host/etc
          readOnly: true
        - name: config
          mountPath: /etc/falco
        - name: containerd-sock
          mountPath: /run/containerd/containerd.sock
      volumes:
      - name: dev-fs
        hostPath:
          path: /dev
      - name: proc-fs
        hostPath:
          path: /proc
      - name: boot-fs
        hostPath:
          path: /boot
      - name: lib-modules
        hostPath:
          path: /lib/modules
      - name: usr-fs
        hostPath:
          path: /usr
      - name: etc-fs
        hostPath:
          path: /etc
      - name: config
        configMap:
          name: falco-config
      - name: containerd-sock
        hostPath:
          path: /run/containerd/containerd.sock

密钥管理

External Secrets Operator

# External Secrets Operator配置
apiVersion: external-secrets.io/v1beta1
kind: SecretStore
metadata:
  name: vault-backend
  namespace: production
spec:
  provider:
    vault:
      server: "https://vault.company.com"
      path: "secret"
      version: "v2"
      auth:
        kubernetes:
          mountPath: "kubernetes"
          role: "external-secrets"
          serviceAccountRef:
            name: external-secrets-sa

---
# 外部密钥同步
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
  name: app-secrets
  namespace: production
spec:
  refreshInterval: 15s
  secretStoreRef:
    name: vault-backend
    kind: SecretStore
  target:
    name: app-secrets
    creationPolicy: Owner
    template:
      type: Opaque
      data:
        database-url: "postgresql://{{ .username }}:{{ .password }}@{{ .host }}:5432/{{ .database }}"
  data:
  - secretKey: username
    remoteRef:
      key: database/production
      property: username
  - secretKey: password
    remoteRef:
      key: database/production
      property: password
  - secretKey: host
    remoteRef:
      key: database/production
      property: host
  - secretKey: database
    remoteRef:
      key: database/production
      property: database

---
# 密钥轮换配置
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
  name: rotating-api-key
  namespace: production
spec:
  refreshInterval: 1h  # 每小时检查一次
  secretStoreRef:
    name: vault-backend
    kind: SecretStore
  target:
    name: api-key
    creationPolicy: Owner
  data:
  - secretKey: api-key
    remoteRef:
      key: api/production
      property: key
      version: latest  # 总是获取最新版本

密钥加密和访问控制

// secret-manager.go
package main

import (
    "context"
    "crypto/aes"
    "crypto/cipher"
    "crypto/rand"
    "encoding/base64"
    "fmt"
    "io"

    corev1 "k8s.io/api/core/v1"
    metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    "k8s.io/client-go/kubernetes"
)

type SecretManager struct {
    clientset *kubernetes.Clientset
    gcm       cipher.AEAD
}

func NewSecretManager(clientset *kubernetes.Clientset, key []byte) (*SecretManager, error) {
    block, err := aes.NewCipher(key)
    if err != nil {
        return nil, err
    }

    gcm, err := cipher.NewGCM(block)
    if err != nil {
        return nil, err
    }

    return &SecretManager{
        clientset: clientset,
        gcm:       gcm,
    }, nil
}

// 加密密钥数据
func (sm *SecretManager) encrypt(data []byte) (string, error) {
    nonce := make([]byte, sm.gcm.NonceSize())
    if _, err := io.ReadFull(rand.Reader, nonce); err != nil {
        return "", err
    }

    ciphertext := sm.gcm.Seal(nonce, nonce, data, nil)
    return base64.StdEncoding.EncodeToString(ciphertext), nil
}

// 解密密钥数据
func (sm *SecretManager) decrypt(encryptedData string) ([]byte, error) {
    data, err := base64.StdEncoding.DecodeString(encryptedData)
    if err != nil {
        return nil, err
    }

    nonceSize := sm.gcm.NonceSize()
    if len(data) < nonceSize {
        return nil, fmt.Errorf("ciphertext too short")
    }

    nonce, ciphertext := data[:nonceSize], data[nonceSize:]
    return sm.gcm.Open(nil, nonce, ciphertext, nil)
}

// 创建加密的Secret
func (sm *SecretManager) CreateEncryptedSecret(namespace, name string, data map[string][]byte) error {
    encryptedData := make(map[string][]byte)
    
    for key, value := range data {
        encrypted, err := sm.encrypt(value)
        if err != nil {
            return err
        }
        encryptedData[key] = []byte(encrypted)
    }

    secret := &corev1.Secret{
        ObjectMeta: metav1.ObjectMeta{
            Name:      name,
            Namespace: namespace,
            Annotations: map[string]string{
                "security.company.com/encrypted": "true",
                "security.company.com/algorithm": "AES-256-GCM",
            },
        },
        Type: corev1.SecretTypeOpaque,
        Data: encryptedData,
    }

    _, err := sm.clientset.CoreV1().Secrets(namespace).Create(context.TODO(), secret, metav1.CreateOptions{})
    return err
}

// 读取并解密Secret
func (sm *SecretManager) GetDecryptedSecret(namespace, name string) (map[string][]byte, error) {
    secret, err := sm.clientset.CoreV1().Secrets(namespace).Get(context.TODO(), name, metav1.GetOptions{})
    if err != nil {
        return nil, err
    }

    // 检查是否为加密的Secret
    if secret.Annotations["security.company.com/encrypted"] != "true" {
        return secret.Data, nil
    }

    decryptedData := make(map[string][]byte)
    for key, encryptedValue := range secret.Data {
        decrypted, err := sm.decrypt(string(encryptedValue))
        if err != nil {
            return nil, err
        }
        decryptedData[key] = decrypted
    }

    return decryptedData, nil
}

// 密钥轮换
func (sm *SecretManager) RotateSecret(namespace, name string, newData map[string][]byte) error {
    // 创建新版本的密钥
    newName := fmt.Sprintf("%s-v%d", name, time.Now().Unix())
    err := sm.CreateEncryptedSecret(namespace, newName, newData)
    if err != nil {
        return err
    }

    // 更新原密钥的标签,指向新版本
    secret, err := sm.clientset.CoreV1().Secrets(namespace).Get(context.TODO(), name, metav1.GetOptions{})
    if err != nil {
        return err
    }

    if secret.Labels == nil {
        secret.Labels = make(map[string]string)
    }
    secret.Labels["security.company.com/current-version"] = newName

    _, err = sm.clientset.CoreV1().Secrets(namespace).Update(context.TODO(), secret, metav1.UpdateOptions{})
    return err
}

通过本文的全面安全加固策略,您可以构建一个多层次、深度防御的Kubernetes安全体系,从集群基础设施到应用工作负载的全方位保护,确保云原生环境的安全性和合规性。


Kubernetes安全是一个持续的过程,需要结合技术手段、流程规范和人员培训,建立完整的安全运营体系。

分享文章