容器编排与管理实战:从Docker到Kubernetes的企业级实践
容器技术已经成为现代应用部署和管理的核心技术栈。本文将从Docker基础开始,深入探讨Kubernetes集群的设计、部署和管理,为企业级容器化提供完整的实践指南。
容器技术架构概览
整体技术栈
graph TB
subgraph "开发层"
A1[应用代码] --> A2[Dockerfile]
A2 --> A3[容器镜像]
end
subgraph "镜像管理层"
A3 --> B1[Harbor Registry]
B1 --> B2[镜像扫描]
B1 --> B3[镜像签名]
end
subgraph "编排层"
B1 --> C1[Kubernetes]
C1 --> C2[Pod管理]
C1 --> C3[Service网络]
C1 --> C4[存储管理]
end
subgraph "运行时层"
C1 --> D1[containerd]
D1 --> D2[runc]
D2 --> D3[Linux Namespace]
D2 --> D4[cgroups]
end
subgraph "基础设施层"
D3 --> E1[计算资源]
D4 --> E2[网络资源]
C4 --> E3[存储资源]
end
subgraph "监控层"
C1 --> F1[Prometheus]
F1 --> F2[Grafana]
C1 --> F3[Jaeger]
C1 --> F4[ELK Stack]
end
Docker企业级实践
多阶段构建优化
# Dockerfile.multi-stage
# 构建阶段
FROM golang:1.21-alpine AS builder
# 设置工作目录
WORKDIR /app
# 安装构建依赖
RUN apk add --no-cache git ca-certificates tzdata
# 复制依赖文件
COPY go.mod go.sum ./
RUN go mod download
# 复制源代码
COPY . .
# 构建应用
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build \
-ldflags='-w -s -extldflags "-static"' \
-a -installsuffix cgo \
-o main ./cmd/server
# 运行阶段
FROM scratch
# 从构建阶段复制必要文件
COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/
COPY --from=builder /usr/share/zoneinfo /usr/share/zoneinfo
COPY --from=builder /app/main /main
# 设置时区
ENV TZ=Asia/Shanghai
# 创建非root用户
USER 65534:65534
# 暴露端口
EXPOSE 8080
# 健康检查
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD ["/main", "healthcheck"]
# 启动应用
ENTRYPOINT ["/main"]
Docker安全最佳实践
# Dockerfile.secure
FROM alpine:3.18
# 创建非特权用户
RUN addgroup -g 1001 -S appgroup && \
adduser -u 1001 -S appuser -G appgroup
# 安装必要的安全更新
RUN apk update && \
apk upgrade && \
apk add --no-cache \
ca-certificates \
dumb-init && \
rm -rf /var/cache/apk/*
# 设置工作目录
WORKDIR /app
# 复制应用文件并设置权限
COPY --chown=appuser:appgroup ./app /app/
RUN chmod +x /app/main
# 切换到非特权用户
USER appuser
# 使用dumb-init作为PID 1
ENTRYPOINT ["dumb-init", "--"]
CMD ["./main"]
# 安全标签
LABEL security.scan="enabled" \
security.policy="restricted" \
maintainer="security@company.com"
Docker Compose生产环境配置
# docker-compose.prod.yml
version: '3.8'
services:
app:
image: myapp:${APP_VERSION:-latest}
build:
context: .
dockerfile: Dockerfile.multi-stage
target: production
restart: unless-stopped
environment:
- NODE_ENV=production
- DATABASE_URL=${DATABASE_URL}
- REDIS_URL=${REDIS_URL}
- JWT_SECRET=${JWT_SECRET}
ports:
- "8080:8080"
volumes:
- app-logs:/app/logs
- app-uploads:/app/uploads
networks:
- app-network
depends_on:
- database
- redis
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
deploy:
resources:
limits:
cpus: '2.0'
memory: 2G
reservations:
cpus: '0.5'
memory: 512M
restart_policy:
condition: on-failure
delay: 5s
max_attempts: 3
window: 120s
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
security_opt:
- no-new-privileges:true
read_only: true
tmpfs:
- /tmp:noexec,nosuid,size=100m
database:
image: postgres:15-alpine
restart: unless-stopped
environment:
- POSTGRES_DB=${DB_NAME}
- POSTGRES_USER=${DB_USER}
- POSTGRES_PASSWORD=${DB_PASSWORD}
volumes:
- postgres-data:/var/lib/postgresql/data
- ./init.sql:/docker-entrypoint-initdb.d/init.sql:ro
networks:
- app-network
ports:
- "127.0.0.1:5432:5432"
command: >
postgres
-c shared_preload_libraries=pg_stat_statements
-c pg_stat_statements.track=all
-c max_connections=200
-c shared_buffers=256MB
-c effective_cache_size=1GB
-c work_mem=4MB
-c maintenance_work_mem=64MB
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${DB_USER} -d ${DB_NAME}"]
interval: 30s
timeout: 10s
retries: 5
deploy:
resources:
limits:
cpus: '1.0'
memory: 1G
reservations:
cpus: '0.25'
memory: 256M
redis:
image: redis:7-alpine
restart: unless-stopped
command: >
redis-server
--appendonly yes
--appendfsync everysec
--maxmemory 512mb
--maxmemory-policy allkeys-lru
volumes:
- redis-data:/data
networks:
- app-network
ports:
- "127.0.0.1:6379:6379"
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 30s
timeout: 10s
retries: 3
deploy:
resources:
limits:
cpus: '0.5'
memory: 512M
reservations:
cpus: '0.1'
memory: 128M
nginx:
image: nginx:1.25-alpine
restart: unless-stopped
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf:ro
- ./ssl:/etc/nginx/ssl:ro
- nginx-logs:/var/log/nginx
networks:
- app-network
depends_on:
- app
healthcheck:
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost/health"]
interval: 30s
timeout: 10s
retries: 3
deploy:
resources:
limits:
cpus: '0.5'
memory: 256M
reservations:
cpus: '0.1'
memory: 64M
volumes:
postgres-data:
driver: local
driver_opts:
type: none
o: bind
device: /data/postgres
redis-data:
driver: local
driver_opts:
type: none
o: bind
device: /data/redis
app-logs:
driver: local
app-uploads:
driver: local
nginx-logs:
driver: local
networks:
app-network:
driver: bridge
ipam:
config:
- subnet: 172.20.0.0/16
Kubernetes集群架构设计
高可用集群配置
# kubeadm-config.yaml
apiVersion: kubeadm.k8s.io/v1beta3
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 10.0.1.10
bindPort: 6443
nodeRegistration:
criSocket: unix:///var/run/containerd/containerd.sock
kubeletExtraArgs:
cloud-provider: external
container-runtime: remote
container-runtime-endpoint: unix:///var/run/containerd/containerd.sock
---
apiVersion: kubeadm.k8s.io/v1beta3
kind: ClusterConfiguration
kubernetesVersion: v1.28.0
clusterName: production-cluster
controlPlaneEndpoint: k8s-api.company.com:6443
apiServer:
advertiseAddress: 10.0.1.10
bindPort: 6443
certSANs:
- k8s-api.company.com
- 10.0.1.10
- 10.0.1.11
- 10.0.1.12
- 127.0.0.1
extraArgs:
audit-log-maxage: "30"
audit-log-maxbackup: "10"
audit-log-maxsize: "100"
audit-log-path: /var/log/audit.log
audit-policy-file: /etc/kubernetes/audit-policy.yaml
enable-admission-plugins: NodeRestriction,ResourceQuota,PodSecurityPolicy
encryption-provider-config: /etc/kubernetes/encryption-config.yaml
extraVolumes:
- name: audit-policy
hostPath: /etc/kubernetes/audit-policy.yaml
mountPath: /etc/kubernetes/audit-policy.yaml
readOnly: true
pathType: File
- name: encryption-config
hostPath: /etc/kubernetes/encryption-config.yaml
mountPath: /etc/kubernetes/encryption-config.yaml
readOnly: true
pathType: File
etcd:
local:
dataDir: /var/lib/etcd
extraArgs:
listen-metrics-urls: http://0.0.0.0:2381
auto-compaction-mode: periodic
auto-compaction-retention: "1"
max-request-bytes: "33554432"
quota-backend-bytes: "6442450944"
networking:
serviceSubnet: 10.96.0.0/12
podSubnet: 10.244.0.0/16
dnsDomain: cluster.local
controllerManager:
extraArgs:
bind-address: 0.0.0.0
secure-port: "10257"
cluster-signing-duration: "8760h"
scheduler:
extraArgs:
bind-address: 0.0.0.0
secure-port: "10259"
---
apiVersion: kubeadm.k8s.io/v1beta3
kind: KubeletConfiguration
cgroupDriver: systemd
containerRuntimeEndpoint: unix:///var/run/containerd/containerd.sock
resolvConf: /run/systemd/resolve/resolv.conf
runtimeRequestTimeout: "15m"
tlsCipherSuites:
- TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256
- TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256
- TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
- TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384
- TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305
- TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384
- TLS_RSA_WITH_AES_256_GCM_SHA384
- TLS_RSA_WITH_AES_128_GCM_SHA256
protectKernelDefaults: true
makeIPTablesUtilChains: true
eventRecordQPS: 0
shutdownGracePeriod: 60s
shutdownGracePeriodCriticalPods: 20s
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
bindAddress: 0.0.0.0
metricsBindAddress: 0.0.0.0:10249
mode: ipvs
ipvs:
strictARP: true
scheduler: rr
iptables:
masqueradeAll: true
网络插件配置 (Calico)
# calico-config.yaml
apiVersion: operator.tigera.io/v1
kind: Installation
metadata:
name: default
spec:
calicoNetwork:
ipPools:
- blockSize: 26
cidr: 10.244.0.0/16
encapsulation: VXLANCrossSubnet
natOutgoing: Enabled
nodeSelector: all()
nodeAddressAutodetectionV4:
interface: "eth0"
mtu: 1440
registry: quay.io/
imagePullSecrets:
- name: tigera-pull-secret
---
apiVersion: projectcalico.org/v3
kind: BGPConfiguration
metadata:
name: default
spec:
logSeverityScreen: Info
nodeToNodeMeshEnabled: true
asNumber: 64512
---
apiVersion: projectcalico.org/v3
kind: IPPool
metadata:
name: default-ipv4-ippool
spec:
cidr: 10.244.0.0/16
ipipMode: CrossSubnet
vxlanMode: Never
natOutgoing: true
disabled: false
nodeSelector: all()
---
apiVersion: projectcalico.org/v3
kind: NetworkPolicy
metadata:
name: default-deny-all
namespace: default
spec:
selector: all()
types:
- Ingress
- Egress
---
apiVersion: projectcalico.org/v3
kind: NetworkPolicy
metadata:
name: allow-dns
namespace: default
spec:
selector: all()
types:
- Egress
egress:
- action: Allow
protocol: UDP
destination:
selector: k8s-app == "kube-dns"
ports:
- 53
- action: Allow
protocol: TCP
destination:
selector: k8s-app == "kube-dns"
ports:
- 53
存储类配置
# storage-classes.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: fast-ssd
annotations:
storageclass.kubernetes.io/is-default-class: "true"
provisioner: kubernetes.io/aws-ebs
parameters:
type: gp3
iops: "3000"
throughput: "125"
encrypted: "true"
kmsKeyId: arn:aws:kms:us-west-2:123456789012:key/12345678-1234-1234-1234-123456789012
volumeBindingMode: WaitForFirstConsumer
allowVolumeExpansion: true
reclaimPolicy: Delete
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: standard-hdd
provisioner: kubernetes.io/aws-ebs
parameters:
type: gp2
encrypted: "true"
volumeBindingMode: WaitForFirstConsumer
allowVolumeExpansion: true
reclaimPolicy: Delete
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: high-iops-ssd
provisioner: kubernetes.io/aws-ebs
parameters:
type: io2
iops: "10000"
encrypted: "true"
volumeBindingMode: WaitForFirstConsumer
allowVolumeExpansion: true
reclaimPolicy: Retain
---
# 本地存储类
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: local-storage
provisioner: kubernetes.io/no-provisioner
volumeBindingMode: WaitForFirstConsumer
reclaimPolicy: Delete
---
apiVersion: v1
kind: PersistentVolume
metadata:
name: local-pv-1
spec:
capacity:
storage: 100Gi
volumeMode: Filesystem
accessModes:
- ReadWriteOnce
persistentVolumeReclaimPolicy: Delete
storageClassName: local-storage
local:
path: /mnt/disks/ssd1
nodeAffinity:
required:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- worker-node-1
应用部署与管理
微服务部署模板
# microservice-template.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: user-service
namespace: production
labels:
app: user-service
version: v1.2.3
component: backend
spec:
replicas: 3
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 1
maxUnavailable: 0
selector:
matchLabels:
app: user-service
template:
metadata:
labels:
app: user-service
version: v1.2.3
component: backend
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
spec:
serviceAccountName: user-service
securityContext:
runAsNonRoot: true
runAsUser: 1001
fsGroup: 1001
containers:
- name: user-service
image: myregistry.com/user-service:v1.2.3
imagePullPolicy: Always
ports:
- containerPort: 8080
name: http
protocol: TCP
- containerPort: 8081
name: grpc
protocol: TCP
env:
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: user-service-secrets
key: database-url
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: user-service-config
key: redis-url
- name: LOG_LEVEL
value: "info"
- name: JAEGER_AGENT_HOST
valueFrom:
fieldRef:
fieldPath: status.hostIP
resources:
requests:
memory: "256Mi"
cpu: "250m"
limits:
memory: "512Mi"
cpu: "500m"
livenessProbe:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
httpGet:
path: /ready
port: 8080
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
startupProbe:
httpGet:
path: /startup
port: 8080
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 30
volumeMounts:
- name: config
mountPath: /app/config
readOnly: true
- name: secrets
mountPath: /app/secrets
readOnly: true
- name: tmp
mountPath: /tmp
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop:
- ALL
volumes:
- name: config
configMap:
name: user-service-config
- name: secrets
secret:
secretName: user-service-secrets
- name: tmp
emptyDir: {}
imagePullSecrets:
- name: registry-secret
nodeSelector:
node-type: application
tolerations:
- key: "application"
operator: "Equal"
value: "true"
effect: "NoSchedule"
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app
operator: In
values:
- user-service
topologyKey: kubernetes.io/hostname
---
apiVersion: v1
kind: Service
metadata:
name: user-service
namespace: production
labels:
app: user-service
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: nlb
service.beta.kubernetes.io/aws-load-balancer-internal: "true"
spec:
type: LoadBalancer
ports:
- port: 80
targetPort: 8080
protocol: TCP
name: http
- port: 8081
targetPort: 8081
protocol: TCP
name: grpc
selector:
app: user-service
---
apiVersion: v1
kind: ConfigMap
metadata:
name: user-service-config
namespace: production
data:
redis-url: "redis://redis-cluster:6379"
log-format: "json"
metrics-enabled: "true"
tracing-enabled: "true"
app.yaml: |
server:
port: 8080
timeout: 30s
database:
max_connections: 100
idle_timeout: 300s
cache:
ttl: 3600s
max_size: 1000
---
apiVersion: v1
kind: Secret
metadata:
name: user-service-secrets
namespace: production
type: Opaque
data:
database-url: cG9zdGdyZXNxbDovL3VzZXI6cGFzc3dvcmRAZGI6NTQzMi9kYg==
jwt-secret: bXlfc3VwZXJfc2VjcmV0X2tleQ==
api-key: YWJjZGVmZ2hpams=
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: user-service
namespace: production
annotations:
eks.amazonaws.com/role-arn: arn:aws:iam::123456789012:role/user-service-role
---
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: user-service-netpol
namespace: production
spec:
podSelector:
matchLabels:
app: user-service
policyTypes:
- Ingress
- Egress
ingress:
- from:
- namespaceSelector:
matchLabels:
name: production
- podSelector:
matchLabels:
component: frontend
ports:
- protocol: TCP
port: 8080
egress:
- to:
- namespaceSelector:
matchLabels:
name: production
- podSelector:
matchLabels:
app: database
ports:
- protocol: TCP
port: 5432
- to:
- namespaceSelector:
matchLabels:
name: production
- podSelector:
matchLabels:
app: redis
ports:
- protocol: TCP
port: 6379
- to: []
ports:
- protocol: UDP
port: 53
Ingress配置
# ingress-nginx.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: app-ingress
namespace: production
annotations:
kubernetes.io/ingress.class: nginx
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "50m"
nginx.ingress.kubernetes.io/proxy-connect-timeout: "60"
nginx.ingress.kubernetes.io/proxy-send-timeout: "60"
nginx.ingress.kubernetes.io/proxy-read-timeout: "60"
nginx.ingress.kubernetes.io/rate-limit: "100"
nginx.ingress.kubernetes.io/rate-limit-window: "1m"
nginx.ingress.kubernetes.io/enable-cors: "true"
nginx.ingress.kubernetes.io/cors-allow-origin: "https://app.company.com"
nginx.ingress.kubernetes.io/cors-allow-methods: "GET, POST, PUT, DELETE, OPTIONS"
nginx.ingress.kubernetes.io/cors-allow-headers: "DNT,X-CustomHeader,Keep-Alive,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Authorization"
cert-manager.io/cluster-issuer: "letsencrypt-prod"
nginx.ingress.kubernetes.io/configuration-snippet: |
more_set_headers "X-Frame-Options: DENY";
more_set_headers "X-Content-Type-Options: nosniff";
more_set_headers "X-XSS-Protection: 1; mode=block";
more_set_headers "Strict-Transport-Security: max-age=31536000; includeSubDomains";
spec:
tls:
- hosts:
- api.company.com
- app.company.com
secretName: app-tls-secret
rules:
- host: api.company.com
http:
paths:
- path: /api/v1/users
pathType: Prefix
backend:
service:
name: user-service
port:
number: 80
- path: /api/v1/orders
pathType: Prefix
backend:
service:
name: order-service
port:
number: 80
- path: /api/v1/payments
pathType: Prefix
backend:
service:
name: payment-service
port:
number: 80
- host: app.company.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: frontend-service
port:
number: 80
---
apiVersion: networking.k8s.io/v1
kind: IngressClass
metadata:
name: nginx
annotations:
ingressclass.kubernetes.io/is-default-class: "true"
spec:
controller: k8s.io/ingress-nginx
安全加固与策略
Pod Security Standards
# pod-security-policy.yaml
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
name: restricted
spec:
privileged: false
allowPrivilegeEscalation: false
requiredDropCapabilities:
- ALL
volumes:
- 'configMap'
- 'emptyDir'
- 'projected'
- 'secret'
- 'downwardAPI'
- 'persistentVolumeClaim'
runAsUser:
rule: 'MustRunAsNonRoot'
seLinux:
rule: 'RunAsAny'
fsGroup:
rule: 'RunAsAny'
readOnlyRootFilesystem: true
---
apiVersion: v1
kind: Namespace
metadata:
name: production
labels:
pod-security.kubernetes.io/enforce: restricted
pod-security.kubernetes.io/audit: restricted
pod-security.kubernetes.io/warn: restricted
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: psp-restricted
rules:
- apiGroups: ['policy']
resources: ['podsecuritypolicies']
verbs: ['use']
resourceNames:
- restricted
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: psp-restricted
roleRef:
kind: ClusterRole
name: psp-restricted
apiGroup: rbac.authorization.k8s.io
subjects:
- kind: ServiceAccount
name: default
namespace: production
OPA Gatekeeper策略
# gatekeeper-policies.yaml
apiVersion: templates.gatekeeper.sh/v1beta1
kind: ConstraintTemplate
metadata:
name: k8srequiredlabels
spec:
crd:
spec:
names:
kind: K8sRequiredLabels
validation:
openAPIV3Schema:
type: object
properties:
labels:
type: array
items:
type: string
targets:
- target: admission.k8s.gatekeeper.sh
rego: |
package k8srequiredlabels
violation[{"msg": msg}] {
required := input.parameters.labels
provided := input.review.object.metadata.labels
missing := required[_]
not provided[missing]
msg := sprintf("Missing required label: %v", [missing])
}
---
apiVersion: constraints.gatekeeper.sh/v1beta1
kind: K8sRequiredLabels
metadata:
name: must-have-app-label
spec:
match:
kinds:
- apiGroups: ["apps"]
kinds: ["Deployment", "StatefulSet", "DaemonSet"]
namespaces: ["production"]
parameters:
labels: ["app", "version", "component"]
---
apiVersion: templates.gatekeeper.sh/v1beta1
kind: ConstraintTemplate
metadata:
name: k8scontainerresources
spec:
crd:
spec:
names:
kind: K8sContainerResources
validation:
openAPIV3Schema:
type: object
properties:
cpu:
type: string
memory:
type: string
targets:
- target: admission.k8s.gatekeeper.sh
rego: |
package k8scontainerresources
violation[{"msg": msg}] {
container := input.review.object.spec.template.spec.containers[_]
not container.resources.requests.cpu
msg := "Container must specify CPU requests"
}
violation[{"msg": msg}] {
container := input.review.object.spec.template.spec.containers[_]
not container.resources.requests.memory
msg := "Container must specify memory requests"
}
violation[{"msg": msg}] {
container := input.review.object.spec.template.spec.containers[_]
not container.resources.limits.cpu
msg := "Container must specify CPU limits"
}
violation[{"msg": msg}] {
container := input.review.object.spec.template.spec.containers[_]
not container.resources.limits.memory
msg := "Container must specify memory limits"
}
---
apiVersion: constraints.gatekeeper.sh/v1beta1
kind: K8sContainerResources
metadata:
name: must-have-resources
spec:
match:
kinds:
- apiGroups: ["apps"]
kinds: ["Deployment", "StatefulSet", "DaemonSet"]
namespaces: ["production"]
CI/CD集成
GitLab CI容器化流水线
# .gitlab-ci.yml
stages:
- build
- test
- security
- deploy-staging
- deploy-production
variables:
DOCKER_DRIVER: overlay2
DOCKER_TLS_CERTDIR: "/certs"
REGISTRY: registry.company.com
IMAGE_NAME: $REGISTRY/$CI_PROJECT_PATH
KUBECONFIG: /tmp/kubeconfig
before_script:
- echo $CI_REGISTRY_PASSWORD | docker login -u $CI_REGISTRY_USER --password-stdin $CI_REGISTRY
build:
stage: build
image: docker:20.10.16
services:
- docker:20.10.16-dind
script:
- docker build --build-arg BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ')
--build-arg VCS_REF=$CI_COMMIT_SHA
--build-arg VERSION=$CI_COMMIT_TAG
-t $IMAGE_NAME:$CI_COMMIT_SHA
-t $IMAGE_NAME:latest .
- docker push $IMAGE_NAME:$CI_COMMIT_SHA
- docker push $IMAGE_NAME:latest
only:
- main
- develop
- tags
test:
stage: test
image: $IMAGE_NAME:$CI_COMMIT_SHA
script:
- go test -v -race -coverprofile=coverage.out ./...
- go tool cover -html=coverage.out -o coverage.html
artifacts:
reports:
coverage_report:
coverage_format: cobertura
path: coverage.xml
paths:
- coverage.html
coverage: '/coverage: \d+\.\d+% of statements/'
only:
- main
- develop
- merge_requests
security-scan:
stage: security
image: aquasec/trivy:latest
script:
- trivy image --exit-code 0 --severity HIGH,CRITICAL --format template --template "@contrib/sarif.tpl" -o trivy-results.sarif $IMAGE_NAME:$CI_COMMIT_SHA
- trivy image --exit-code 1 --severity CRITICAL $IMAGE_NAME:$CI_COMMIT_SHA
artifacts:
reports:
sast: trivy-results.sarif
only:
- main
- develop
- tags
deploy-staging:
stage: deploy-staging
image: bitnami/kubectl:latest
environment:
name: staging
url: https://staging.company.com
script:
- echo $KUBE_CONFIG_STAGING | base64 -d > $KUBECONFIG
- kubectl config use-context staging
- envsubst < k8s/deployment.yaml | kubectl apply -f -
- kubectl rollout status deployment/app -n staging --timeout=300s
- kubectl get pods -n staging -l app=myapp
variables:
NAMESPACE: staging
REPLICAS: 2
IMAGE_TAG: $CI_COMMIT_SHA
only:
- develop
deploy-production:
stage: deploy-production
image: bitnami/kubectl:latest
environment:
name: production
url: https://app.company.com
script:
- echo $KUBE_CONFIG_PRODUCTION | base64 -d > $KUBECONFIG
- kubectl config use-context production
- envsubst < k8s/deployment.yaml | kubectl apply -f -
- kubectl rollout status deployment/app -n production --timeout=600s
- kubectl get pods -n production -l app=myapp
variables:
NAMESPACE: production
REPLICAS: 5
IMAGE_TAG: $CI_COMMIT_SHA
when: manual
only:
- main
- tags
Helm Chart模板
# Chart.yaml
apiVersion: v2
name: microservice
description: A Helm chart for microservice deployment
type: application
version: 0.1.0
appVersion: "1.0.0"
dependencies:
- name: postgresql
version: 11.9.13
repository: https://charts.bitnami.com/bitnami
condition: postgresql.enabled
- name: redis
version: 17.3.7
repository: https://charts.bitnami.com/bitnami
condition: redis.enabled
---
# values.yaml
replicaCount: 3
image:
repository: myregistry.com/myapp
pullPolicy: IfNotPresent
tag: ""
imagePullSecrets:
- name: registry-secret
nameOverride: ""
fullnameOverride: ""
serviceAccount:
create: true
annotations: {}
name: ""
podAnnotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
podSecurityContext:
fsGroup: 1001
runAsNonRoot: true
runAsUser: 1001
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop:
- ALL
service:
type: ClusterIP
port: 80
targetPort: 8080
ingress:
enabled: true
className: "nginx"
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
nginx.ingress.kubernetes.io/ssl-redirect: "true"
hosts:
- host: api.company.com
paths:
- path: /api/v1
pathType: Prefix
tls:
- secretName: api-tls
hosts:
- api.company.com
resources:
limits:
cpu: 500m
memory: 512Mi
requests:
cpu: 250m
memory: 256Mi
autoscaling:
enabled: true
minReplicas: 3
maxReplicas: 10
targetCPUUtilizationPercentage: 70
targetMemoryUtilizationPercentage: 80
nodeSelector: {}
tolerations: []
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- microservice
topologyKey: kubernetes.io/hostname
postgresql:
enabled: true
auth:
postgresPassword: "changeme"
database: "myapp"
primary:
persistence:
enabled: true
size: 20Gi
redis:
enabled: true
auth:
enabled: false
master:
persistence:
enabled: true
size: 8Gi
config:
logLevel: info
database:
maxConnections: 100
cache:
ttl: 3600
secrets:
databaseUrl: ""
jwtSecret: ""
apiKey: ""
总结
容器编排与管理是现代云原生应用的核心技术。通过Docker的标准化打包、Kubernetes的强大编排能力,以及完善的安全策略和CI/CD集成,企业可以构建高效、可靠、安全的容器化平台。
在实际实施过程中,建议采用渐进式的迁移策略,从简单的无状态应用开始,逐步扩展到复杂的微服务架构。同时,要重视监控、日志、安全等运维体系的建设,确保容器化平台的稳定运行和持续优化。