Redis集群架构设计与实践:从单机到分布式的完整方案
Redis作为高性能的内存数据库,在现代互联网架构中扮演着重要角色。随着业务规模的增长,单机Redis往往无法满足高并发、大容量的需求。本文将深入探讨Redis集群架构的设计原理和实践方案,从基础的主从复制到复杂的分布式集群部署。
Redis集群架构概述
Redis集群模式对比
| 模式 | 特点 | 适用场景 | 优点 | 缺点 |
|---|---|---|---|---|
| 单机模式 | 单个Redis实例 | 开发测试、小型应用 | 简单易用、性能高 | 无高可用、容量受限 |
| 主从复制 | 一主多从,读写分离 | 读多写少场景 | 读性能扩展、数据备份 | 主节点单点故障 |
| 哨兵模式 | 主从+自动故障转移 | 中等规模应用 | 自动故障转移、高可用 | 写性能无法扩展 |
| Cluster集群 | 分布式集群 | 大规模应用 | 水平扩展、高可用 | 复杂度高、部分命令限制 |
| 代理模式 | 通过代理分片 | 平滑迁移场景 | 对客户端透明 | 代理成为瓶颈 |
Redis集群架构演进
graph TB
subgraph "单机模式"
A[Redis Server]
A1[Client]
A1 --> A
end
subgraph "主从复制模式"
B[Master]
B1[Slave 1]
B2[Slave 2]
B3[Client]
B --> B1
B --> B2
B3 --> B
B3 --> B1
B3 --> B2
end
subgraph "哨兵模式"
C[Master]
C1[Slave 1]
C2[Slave 2]
C3[Sentinel 1]
C4[Sentinel 2]
C5[Sentinel 3]
C6[Client]
C --> C1
C --> C2
C3 --> C
C4 --> C
C5 --> C
C6 --> C3
end
subgraph "Cluster集群模式"
D[Node 1<br/>Master]
D1[Node 2<br/>Master]
D2[Node 3<br/>Master]
D3[Node 4<br/>Slave]
D4[Node 5<br/>Slave]
D5[Node 6<br/>Slave]
D6[Client]
D --> D3
D1 --> D4
D2 --> D5
D6 --> D
D6 --> D1
D6 --> D2
end
主从复制配置与优化
主从复制原理
Redis主从复制采用异步复制机制,包含以下关键步骤:
- 全量同步:从节点首次连接主节点时进行
- 增量同步:主节点将写命令异步发送给从节点
- 心跳检测:定期检查主从连接状态
主节点配置
# redis-master.conf
# 基础配置
bind 0.0.0.0
port 6379
daemonize yes
pidfile /var/run/redis/redis-master.pid
logfile /var/log/redis/redis-master.log
dir /var/lib/redis/master
# 内存配置
maxmemory 2gb
maxmemory-policy allkeys-lru
# 持久化配置
save 900 1
save 300 10
save 60 10000
rdbcompression yes
rdbchecksum yes
dbfilename dump.rdb
# AOF配置
appendonly yes
appendfilename "appendonly.aof"
appendfsync everysec
no-appendfsync-on-rewrite no
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb
# 复制配置
repl-diskless-sync no
repl-diskless-sync-delay 5
repl-ping-slave-period 10
repl-timeout 60
repl-disable-tcp-nodelay no
repl-backlog-size 1mb
repl-backlog-ttl 3600
# 安全配置
requirepass your_password
masterauth your_password
# 客户端配置
timeout 300
tcp-keepalive 300
tcp-backlog 511
maxclients 10000
# 慢查询配置
slowlog-log-slower-than 10000
slowlog-max-len 128
# 内存优化
hash-max-ziplist-entries 512
hash-max-ziplist-value 64
list-max-ziplist-size -2
list-compress-depth 0
set-max-intset-entries 512
zset-max-ziplist-entries 128
zset-max-ziplist-value 64
从节点配置
# redis-slave.conf
# 基础配置
bind 0.0.0.0
port 6379
daemonize yes
pidfile /var/run/redis/redis-slave.pid
logfile /var/log/redis/redis-slave.log
dir /var/lib/redis/slave
# 复制配置
slaveof 192.168.1.10 6379
masterauth your_password
slave-serve-stale-data yes
slave-read-only yes
slave-priority 100
# 内存配置
maxmemory 2gb
maxmemory-policy allkeys-lru
# 持久化配置(从节点可以关闭持久化以提高性能)
save ""
appendonly no
# 安全配置
requirepass your_password
# 客户端配置
timeout 300
tcp-keepalive 300
maxclients 10000
# 慢查询配置
slowlog-log-slower-than 10000
slowlog-max-len 128
主从复制监控脚本
#!/bin/bash
# scripts/redis_replication_monitor.sh
set -euo pipefail
# 配置参数
MASTER_HOST="${MASTER_HOST:-192.168.1.10}"
MASTER_PORT="${MASTER_PORT:-6379}"
MASTER_AUTH="${MASTER_AUTH:-your_password}"
SLAVES=("192.168.1.11:6379" "192.168.1.12:6379")
CHECK_INTERVAL="${CHECK_INTERVAL:-30}"
ALERT_THRESHOLD="${ALERT_THRESHOLD:-10}"
# 日志函数
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" >&2
}
# 检查Redis连接
check_redis_connection() {
local host="$1"
local port="$2"
local auth="$3"
if redis-cli -h "${host}" -p "${port}" -a "${auth}" ping > /dev/null 2>&1; then
return 0
else
return 1
fi
}
# 获取复制延迟
get_replication_lag() {
local slave_host="$1"
local slave_port="$2"
local auth="$3"
# 获取从节点的复制偏移量
local slave_offset
slave_offset=$(redis-cli -h "${slave_host}" -p "${slave_port}" -a "${auth}" \
info replication | grep "slave_repl_offset" | cut -d: -f2 | tr -d '\r')
# 获取主节点的复制偏移量
local master_offset
master_offset=$(redis-cli -h "${MASTER_HOST}" -p "${MASTER_PORT}" -a "${MASTER_AUTH}" \
info replication | grep "master_repl_offset" | cut -d: -f2 | tr -d '\r')
# 计算延迟
if [[ -n "${slave_offset}" && -n "${master_offset}" ]]; then
echo $((master_offset - slave_offset))
else
echo "-1"
fi
}
# 检查主节点状态
check_master_status() {
log "检查主节点状态: ${MASTER_HOST}:${MASTER_PORT}"
if ! check_redis_connection "${MASTER_HOST}" "${MASTER_PORT}" "${MASTER_AUTH}"; then
log "ERROR: 主节点连接失败"
send_alert "Redis主节点连接失败: ${MASTER_HOST}:${MASTER_PORT}"
return 1
fi
# 获取主节点信息
local master_info
master_info=$(redis-cli -h "${MASTER_HOST}" -p "${MASTER_PORT}" -a "${MASTER_AUTH}" info replication)
local role
role=$(echo "${master_info}" | grep "role:" | cut -d: -f2 | tr -d '\r')
if [[ "${role}" != "master" ]]; then
log "ERROR: 节点角色不是master,当前角色: ${role}"
send_alert "Redis节点角色异常: ${MASTER_HOST}:${MASTER_PORT} 角色为 ${role}"
return 1
fi
local connected_slaves
connected_slaves=$(echo "${master_info}" | grep "connected_slaves:" | cut -d: -f2 | tr -d '\r')
log "主节点状态正常,连接的从节点数: ${connected_slaves}"
return 0
}
# 检查从节点状态
check_slave_status() {
local slave_addr="$1"
local slave_host
local slave_port
IFS=':' read -r slave_host slave_port <<< "${slave_addr}"
log "检查从节点状态: ${slave_host}:${slave_port}"
if ! check_redis_connection "${slave_host}" "${slave_port}" "${MASTER_AUTH}"; then
log "ERROR: 从节点连接失败: ${slave_host}:${slave_port}"
send_alert "Redis从节点连接失败: ${slave_host}:${slave_port}"
return 1
fi
# 获取从节点信息
local slave_info
slave_info=$(redis-cli -h "${slave_host}" -p "${slave_port}" -a "${MASTER_AUTH}" info replication)
local role
role=$(echo "${slave_info}" | grep "role:" | cut -d: -f2 | tr -d '\r')
if [[ "${role}" != "slave" ]]; then
log "ERROR: 节点角色不是slave,当前角色: ${role}"
send_alert "Redis节点角色异常: ${slave_host}:${slave_port} 角色为 ${role}"
return 1
fi
local master_link_status
master_link_status=$(echo "${slave_info}" | grep "master_link_status:" | cut -d: -f2 | tr -d '\r')
if [[ "${master_link_status}" != "up" ]]; then
log "ERROR: 从节点与主节点连接异常: ${master_link_status}"
send_alert "Redis从节点连接异常: ${slave_host}:${slave_port} 状态为 ${master_link_status}"
return 1
fi
# 检查复制延迟
local lag
lag=$(get_replication_lag "${slave_host}" "${slave_port}" "${MASTER_AUTH}")
if [[ "${lag}" -eq -1 ]]; then
log "WARNING: 无法获取复制延迟信息"
elif [[ "${lag}" -gt "${ALERT_THRESHOLD}" ]]; then
log "WARNING: 复制延迟过高: ${lag} bytes"
send_alert "Redis复制延迟过高: ${slave_host}:${slave_port} 延迟 ${lag} bytes"
else
log "从节点状态正常,复制延迟: ${lag} bytes"
fi
return 0
}
# 发送告警
send_alert() {
local message="$1"
# 发送到Slack
if [[ -n "${SLACK_WEBHOOK_URL:-}" ]]; then
curl -X POST "${SLACK_WEBHOOK_URL}" \
-H 'Content-type: application/json' \
-d "{\"text\":\"🚨 Redis Alert: ${message}\"}" || true
fi
# 发送邮件
if command -v mail &> /dev/null; then
echo "${message}" | mail -s "Redis Alert" "${ALERT_EMAIL:-admin@company.com}" || true
fi
log "Alert sent: ${message}"
}
# 主循环
main() {
log "开始Redis主从复制监控"
while true; do
# 检查主节点
check_master_status
# 检查所有从节点
for slave in "${SLAVES[@]}"; do
check_slave_status "${slave}"
done
log "监控周期完成,等待 ${CHECK_INTERVAL} 秒"
sleep "${CHECK_INTERVAL}"
done
}
# 信号处理
cleanup() {
log "监控程序退出"
exit 0
}
trap cleanup SIGTERM SIGINT
# 启动监控
main "$@"
Redis Sentinel哨兵模式
Sentinel配置
# sentinel.conf
# 基础配置
port 26379
daemonize yes
pidfile /var/run/redis/redis-sentinel.pid
logfile /var/log/redis/redis-sentinel.log
dir /var/lib/redis/sentinel
# 监控配置
sentinel monitor mymaster 192.168.1.10 6379 2
sentinel auth-pass mymaster your_password
sentinel down-after-milliseconds mymaster 30000
sentinel parallel-syncs mymaster 1
sentinel failover-timeout mymaster 180000
# 通知脚本
sentinel notification-script mymaster /opt/redis/scripts/notify.sh
sentinel client-reconfig-script mymaster /opt/redis/scripts/reconfig.sh
# 其他配置
sentinel deny-scripts-reconfig yes
Sentinel部署脚本
#!/bin/bash
# scripts/deploy_sentinel.sh
set -euo pipefail
# 配置参数
SENTINEL_NODES=("192.168.1.20" "192.168.1.21" "192.168.1.22")
SENTINEL_PORT="26379"
MASTER_NAME="mymaster"
MASTER_HOST="192.168.1.10"
MASTER_PORT="6379"
MASTER_AUTH="your_password"
QUORUM="2"
# 创建Sentinel配置文件
create_sentinel_config() {
local node_ip="$1"
local config_file="/tmp/sentinel-${node_ip}.conf"
cat > "${config_file}" << EOF
# Redis Sentinel配置文件
port ${SENTINEL_PORT}
daemonize yes
pidfile /var/run/redis/redis-sentinel.pid
logfile /var/log/redis/redis-sentinel.log
dir /var/lib/redis/sentinel
# 监控主节点
sentinel monitor ${MASTER_NAME} ${MASTER_HOST} ${MASTER_PORT} ${QUORUM}
sentinel auth-pass ${MASTER_NAME} ${MASTER_AUTH}
# 故障检测配置
sentinel down-after-milliseconds ${MASTER_NAME} 30000
sentinel parallel-syncs ${MASTER_NAME} 1
sentinel failover-timeout ${MASTER_NAME} 180000
# 通知脚本
sentinel notification-script ${MASTER_NAME} /opt/redis/scripts/notify.sh
sentinel client-reconfig-script ${MASTER_NAME} /opt/redis/scripts/reconfig.sh
# 安全配置
sentinel deny-scripts-reconfig yes
# 日志配置
loglevel notice
EOF
echo "${config_file}"
}
# 创建通知脚本
create_notification_script() {
local script_file="/tmp/notify.sh"
cat > "${script_file}" << 'EOF'
#!/bin/bash
# Redis Sentinel通知脚本
EVENT_TYPE="$1"
EVENT_NAME="$2"
shift 2
EVENT_DETAILS="$@"
TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
MESSAGE="Redis Sentinel Event: ${EVENT_TYPE} ${EVENT_NAME} - ${EVENT_DETAILS}"
# 记录日志
echo "[${TIMESTAMP}] ${MESSAGE}" >> /var/log/redis/sentinel-events.log
# 发送告警
if [[ -n "${SLACK_WEBHOOK_URL:-}" ]]; then
curl -X POST "${SLACK_WEBHOOK_URL}" \
-H 'Content-type: application/json' \
-d "{\"text\":\"🔔 ${MESSAGE}\"}" || true
fi
exit 0
EOF
chmod +x "${script_file}"
echo "${script_file}"
}
# 创建重配置脚本
create_reconfig_script() {
local script_file="/tmp/reconfig.sh"
cat > "${script_file}" << 'EOF'
#!/bin/bash
# Redis Sentinel重配置脚本
MASTER_NAME="$1"
ROLE="$2"
STATE="$3"
FROM_IP="$4"
FROM_PORT="$5"
TO_IP="$6"
TO_PORT="$7"
TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
MESSAGE="Redis failover: ${MASTER_NAME} ${ROLE} ${STATE} from ${FROM_IP}:${FROM_PORT} to ${TO_IP}:${TO_PORT}"
# 记录日志
echo "[${TIMESTAMP}] ${MESSAGE}" >> /var/log/redis/sentinel-reconfig.log
# 更新应用配置
# 这里可以添加更新应用配置的逻辑
# 例如:更新负载均衡器配置、通知应用服务等
exit 0
EOF
chmod +x "${script_file}"
echo "${script_file}"
}
# 部署Sentinel到指定节点
deploy_to_node() {
local node_ip="$1"
echo "部署Sentinel到节点: ${node_ip}"
# 创建配置文件
local config_file
config_file=$(create_sentinel_config "${node_ip}")
# 创建脚本文件
local notify_script
local reconfig_script
notify_script=$(create_notification_script)
reconfig_script=$(create_reconfig_script)
# 复制文件到目标节点
scp "${config_file}" "root@${node_ip}:/etc/redis/sentinel.conf"
scp "${notify_script}" "root@${node_ip}:/opt/redis/scripts/notify.sh"
scp "${reconfig_script}" "root@${node_ip}:/opt/redis/scripts/reconfig.sh"
# 在目标节点上创建必要的目录
ssh "root@${node_ip}" "mkdir -p /var/run/redis /var/log/redis /var/lib/redis/sentinel /opt/redis/scripts"
# 设置权限
ssh "root@${node_ip}" "chmod +x /opt/redis/scripts/*.sh"
# 启动Sentinel
ssh "root@${node_ip}" "redis-sentinel /etc/redis/sentinel.conf"
# 检查启动状态
sleep 3
if ssh "root@${node_ip}" "pgrep -f redis-sentinel > /dev/null"; then
echo "Sentinel在节点 ${node_ip} 启动成功"
else
echo "Sentinel在节点 ${node_ip} 启动失败"
exit 1
fi
# 清理临时文件
rm -f "${config_file}" "${notify_script}" "${reconfig_script}"
}
# 验证Sentinel集群状态
verify_sentinel_cluster() {
echo "验证Sentinel集群状态..."
for node_ip in "${SENTINEL_NODES[@]}"; do
echo "检查节点 ${node_ip}:"
# 检查Sentinel状态
ssh "root@${node_ip}" "redis-cli -p ${SENTINEL_PORT} sentinel masters" | head -20
echo "---"
done
}
# 主函数
main() {
echo "开始部署Redis Sentinel集群..."
# 部署到所有节点
for node_ip in "${SENTINEL_NODES[@]}"; do
deploy_to_node "${node_ip}"
done
# 等待集群稳定
echo "等待Sentinel集群稳定..."
sleep 10
# 验证集群状态
verify_sentinel_cluster
echo "Redis Sentinel集群部署完成!"
}
# 执行主函数
main "$@"
Sentinel客户端连接示例
// Java客户端连接Sentinel
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisSentinelPool;
import java.util.HashSet;
import java.util.Set;
public class RedisSentinelClient {
private JedisSentinelPool jedisSentinelPool;
public RedisSentinelClient() {
// 配置Sentinel节点
Set<String> sentinels = new HashSet<>();
sentinels.add("192.168.1.20:26379");
sentinels.add("192.168.1.21:26379");
sentinels.add("192.168.1.22:26379");
// 创建连接池
jedisSentinelPool = new JedisSentinelPool(
"mymaster", // 主节点名称
sentinels, // Sentinel节点集合
"your_password" // Redis密码
);
}
public String get(String key) {
try (Jedis jedis = jedisSentinelPool.getResource()) {
return jedis.get(key);
}
}
public void set(String key, String value) {
try (Jedis jedis = jedisSentinelPool.getResource()) {
jedis.set(key, value);
}
}
public void close() {
if (jedisSentinelPool != null) {
jedisSentinelPool.close();
}
}
}
Redis Cluster集群模式
Cluster节点配置
# redis-cluster.conf
# 基础配置
bind 0.0.0.0
port 7000
daemonize yes
pidfile /var/run/redis/redis-cluster-7000.pid
logfile /var/log/redis/redis-cluster-7000.log
dir /var/lib/redis/cluster
# 集群配置
cluster-enabled yes
cluster-config-file nodes-7000.conf
cluster-node-timeout 15000
cluster-announce-ip 192.168.1.10
cluster-announce-port 7000
cluster-announce-bus-port 17000
# 故障转移配置
cluster-require-full-coverage no
cluster-slave-validity-factor 10
cluster-migration-barrier 1
# 内存配置
maxmemory 2gb
maxmemory-policy allkeys-lru
# 持久化配置
save 900 1
save 300 10
save 60 10000
appendonly yes
appendfilename "appendonly-7000.aof"
appendfsync everysec
# 安全配置
requirepass your_password
masterauth your_password
# 客户端配置
timeout 300
tcp-keepalive 300
maxclients 10000
# 慢查询配置
slowlog-log-slower-than 10000
slowlog-max-len 128
Cluster集群部署脚本
#!/bin/bash
# scripts/deploy_redis_cluster.sh
set -euo pipefail
# 配置参数
CLUSTER_NODES=(
"192.168.1.10:7000"
"192.168.1.11:7000"
"192.168.1.12:7000"
"192.168.1.13:7000"
"192.168.1.14:7000"
"192.168.1.15:7000"
)
REDIS_PASSWORD="your_password"
REPLICAS=1
# 创建Redis配置文件
create_redis_config() {
local node_ip="$1"
local node_port="$2"
local config_file="/tmp/redis-cluster-${node_ip}-${node_port}.conf"
cat > "${config_file}" << EOF
# Redis Cluster配置文件
bind 0.0.0.0
port ${node_port}
daemonize yes
pidfile /var/run/redis/redis-cluster-${node_port}.pid
logfile /var/log/redis/redis-cluster-${node_port}.log
dir /var/lib/redis/cluster
# 集群配置
cluster-enabled yes
cluster-config-file nodes-${node_port}.conf
cluster-node-timeout 15000
cluster-announce-ip ${node_ip}
cluster-announce-port ${node_port}
cluster-announce-bus-port $((node_port + 10000))
# 故障转移配置
cluster-require-full-coverage no
cluster-slave-validity-factor 10
cluster-migration-barrier 1
# 内存配置
maxmemory 2gb
maxmemory-policy allkeys-lru
# 持久化配置
save 900 1
save 300 10
save 60 10000
appendonly yes
appendfilename "appendonly-${node_port}.aof"
appendfsync everysec
# 安全配置
requirepass ${REDIS_PASSWORD}
masterauth ${REDIS_PASSWORD}
# 客户端配置
timeout 300
tcp-keepalive 300
maxclients 10000
# 慢查询配置
slowlog-log-slower-than 10000
slowlog-max-len 128
# 内存优化
hash-max-ziplist-entries 512
hash-max-ziplist-value 64
list-max-ziplist-size -2
set-max-intset-entries 512
zset-max-ziplist-entries 128
zset-max-ziplist-value 64
EOF
echo "${config_file}"
}
# 部署Redis节点
deploy_redis_node() {
local node_addr="$1"
local node_ip
local node_port
IFS=':' read -r node_ip node_port <<< "${node_addr}"
echo "部署Redis节点: ${node_ip}:${node_port}"
# 创建配置文件
local config_file
config_file=$(create_redis_config "${node_ip}" "${node_port}")
# 复制配置文件到目标节点
scp "${config_file}" "root@${node_ip}:/etc/redis/redis-cluster-${node_port}.conf"
# 在目标节点上创建必要的目录
ssh "root@${node_ip}" "mkdir -p /var/run/redis /var/log/redis /var/lib/redis/cluster"
# 启动Redis节点
ssh "root@${node_ip}" "redis-server /etc/redis/redis-cluster-${node_port}.conf"
# 检查启动状态
sleep 3
if ssh "root@${node_ip}" "redis-cli -h ${node_ip} -p ${node_port} -a ${REDIS_PASSWORD} ping" | grep -q "PONG"; then
echo "Redis节点 ${node_ip}:${node_port} 启动成功"
else
echo "Redis节点 ${node_ip}:${node_port} 启动失败"
exit 1
fi
# 清理临时文件
rm -f "${config_file}"
}
# 创建集群
create_cluster() {
echo "创建Redis集群..."
# 构建节点列表
local nodes_list=""
for node in "${CLUSTER_NODES[@]}"; do
nodes_list="${nodes_list} ${node}"
done
# 创建集群
echo "yes" | redis-cli --cluster create ${nodes_list} \
--cluster-replicas ${REPLICAS} \
-a "${REDIS_PASSWORD}"
if [ $? -eq 0 ]; then
echo "Redis集群创建成功"
else
echo "Redis集群创建失败"
exit 1
fi
}
# 验证集群状态
verify_cluster() {
echo "验证集群状态..."
# 获取第一个节点
local first_node="${CLUSTER_NODES[0]}"
local node_ip
local node_port
IFS=':' read -r node_ip node_port <<< "${first_node}"
# 检查集群信息
echo "集群节点信息:"
redis-cli -h "${node_ip}" -p "${node_port}" -a "${REDIS_PASSWORD}" cluster nodes
echo ""
echo "集群状态信息:"
redis-cli -h "${node_ip}" -p "${node_port}" -a "${REDIS_PASSWORD}" cluster info
echo ""
echo "集群槽位分配:"
redis-cli -h "${node_ip}" -p "${node_port}" -a "${REDIS_PASSWORD}" cluster slots
}
# 测试集群功能
test_cluster() {
echo "测试集群功能..."
local first_node="${CLUSTER_NODES[0]}"
local node_ip
local node_port
IFS=':' read -r node_ip node_port <<< "${first_node}"
# 测试写入和读取
for i in {1..10}; do
local key="test_key_${i}"
local value="test_value_${i}"
# 写入数据
redis-cli -h "${node_ip}" -p "${node_port}" -a "${REDIS_PASSWORD}" -c set "${key}" "${value}"
# 读取数据
local result
result=$(redis-cli -h "${node_ip}" -p "${node_port}" -a "${REDIS_PASSWORD}" -c get "${key}")
if [[ "${result}" == "${value}" ]]; then
echo "测试 ${key} 成功"
else
echo "测试 ${key} 失败,期望: ${value},实际: ${result}"
fi
done
}
# 主函数
main() {
echo "开始部署Redis Cluster集群..."
# 部署所有Redis节点
for node in "${CLUSTER_NODES[@]}"; do
deploy_redis_node "${node}"
done
# 等待所有节点启动完成
echo "等待所有节点启动完成..."
sleep 10
# 创建集群
create_cluster
# 等待集群稳定
echo "等待集群稳定..."
sleep 5
# 验证集群状态
verify_cluster
# 测试集群功能
test_cluster
echo "Redis Cluster集群部署完成!"
}
# 执行主函数
main "$@"
Cluster客户端连接示例
// Java客户端连接Redis Cluster
import redis.clients.jedis.HostAndPort;
import redis.clients.jedis.JedisCluster;
import redis.clients.jedis.JedisPoolConfig;
import java.util.HashSet;
import java.util.Set;
public class RedisClusterClient {
private JedisCluster jedisCluster;
public RedisClusterClient() {
// 配置集群节点
Set<HostAndPort> clusterNodes = new HashSet<>();
clusterNodes.add(new HostAndPort("192.168.1.10", 7000));
clusterNodes.add(new HostAndPort("192.168.1.11", 7000));
clusterNodes.add(new HostAndPort("192.168.1.12", 7000));
clusterNodes.add(new HostAndPort("192.168.1.13", 7000));
clusterNodes.add(new HostAndPort("192.168.1.14", 7000));
clusterNodes.add(new HostAndPort("192.168.1.15", 7000));
// 配置连接池
JedisPoolConfig poolConfig = new JedisPoolConfig();
poolConfig.setMaxTotal(100);
poolConfig.setMaxIdle(20);
poolConfig.setMinIdle(5);
poolConfig.setTestOnBorrow(true);
poolConfig.setTestOnReturn(true);
poolConfig.setTestWhileIdle(true);
// 创建集群连接
jedisCluster = new JedisCluster(
clusterNodes, // 集群节点
2000, // 连接超时
2000, // 读取超时
5, // 最大重定向次数
"your_password", // 密码
poolConfig // 连接池配置
);
}
public String get(String key) {
return jedisCluster.get(key);
}
public void set(String key, String value) {
jedisCluster.set(key, value);
}
public void close() {
if (jedisCluster != null) {
jedisCluster.close();
}
}
}
Redis分片策略与一致性哈希
一致性哈希实现
/**
* Redis一致性哈希分片器
*/
public class RedisConsistentHashSharding {
// 虚拟节点数量
private final int virtualNodes;
// 哈希环
private final SortedMap<Long, RedisNode> hashRing;
// 物理节点列表
private final List<RedisNode> physicalNodes;
public RedisConsistentHashSharding(List<RedisNode> nodes, int virtualNodes) {
this.virtualNodes = virtualNodes;
this.hashRing = new TreeMap<>();
this.physicalNodes = new ArrayList<>(nodes);
// 初始化哈希环
initHashRing();
}
/**
* 初始化哈希环
*/
private void initHashRing() {
for (RedisNode node : physicalNodes) {
addNode(node);
}
}
/**
* 添加节点到哈希环
*/
public void addNode(RedisNode node) {
for (int i = 0; i < virtualNodes; i++) {
String virtualNodeKey = node.getHost() + ":" + node.getPort() + "-" + i;
long hash = hash(virtualNodeKey);
hashRing.put(hash, node);
}
}
/**
* 从哈希环中移除节点
*/
public void removeNode(RedisNode node) {
for (int i = 0; i < virtualNodes; i++) {
String virtualNodeKey = node.getHost() + ":" + node.getPort() + "-" + i;
long hash = hash(virtualNodeKey);
hashRing.remove(hash);
}
}
/**
* 根据key获取对应的Redis节点
*/
public RedisNode getNode(String key) {
if (hashRing.isEmpty()) {
return null;
}
long hash = hash(key);
// 查找第一个大于等于该hash值的节点
SortedMap<Long, RedisNode> tailMap = hashRing.tailMap(hash);
// 如果没有找到,则返回环的第一个节点
Long nodeHash = tailMap.isEmpty() ? hashRing.firstKey() : tailMap.firstKey();
return hashRing.get(nodeHash);
}
/**
* 计算哈希值
*/
private long hash(String key) {
// 使用MD5算法计算哈希值
try {
MessageDigest md = MessageDigest.getInstance("MD5");
byte[] digest = md.digest(key.getBytes(StandardCharsets.UTF_8));
// 取前8个字节转换为long
long hash = 0;
for (int i = 0; i < 8; i++) {
hash = (hash << 8) | (digest[i] & 0xFF);
}
return hash;
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException("MD5 algorithm not found", e);
}
}
/**
* 获取所有节点
*/
public List<RedisNode> getAllNodes() {
return new ArrayList<>(physicalNodes);
}
/**
* 获取哈希环状态
*/
public Map<Long, RedisNode> getHashRingStatus() {
return new HashMap<>(hashRing);
}
/**
* Redis节点类
*/
public static class RedisNode {
private String host;
private int port;
private String password;
private Jedis jedis;
public RedisNode(String host, int port, String password) {
this.host = host;
this.port = port;
this.password = password;
this.jedis = new Jedis(host, port);
if (password != null && !password.isEmpty()) {
this.jedis.auth(password);
}
}
public String get(String key) {
return jedis.get(key);
}
public void set(String key, String value) {
jedis.set(key, value);
}
public void close() {
if (jedis != null) {
jedis.close();
}
}
// Getters and setters
public String getHost() { return host; }
public int getPort() { return port; }
public String getPassword() { return password; }
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
RedisNode redisNode = (RedisNode) o;
return port == redisNode.port && Objects.equals(host, redisNode.host);
}
@Override
public int hashCode() {
return Objects.hash(host, port);
}
@Override
public String toString() {
return host + ":" + port;
}
}
}
分片客户端实现
/**
* Redis分片客户端
*/
public class RedisShardingClient {
private final RedisConsistentHashSharding sharding;
private final ExecutorService executorService;
public RedisShardingClient(List<RedisConsistentHashSharding.RedisNode> nodes) {
this.sharding = new RedisConsistentHashSharding(nodes, 160);
this.executorService = Executors.newFixedThreadPool(10);
}
/**
* 获取值
*/
public String get(String key) {
RedisConsistentHashSharding.RedisNode node = sharding.getNode(key);
if (node == null) {
throw new RuntimeException("No available Redis node");
}
try {
return node.get(key);
} catch (Exception e) {
// 如果节点不可用,尝试其他节点
return getFromBackupNode(key, node);
}
}
/**
* 设置值
*/
public void set(String key, String value) {
RedisConsistentHashSharding.RedisNode node = sharding.getNode(key);
if (node == null) {
throw new RuntimeException("No available Redis node");
}
try {
node.set(key, value);
} catch (Exception e) {
// 如果节点不可用,尝试其他节点
setToBackupNode(key, value, node);
}
}
/**
* 批量获取
*/
public Map<String, String> mget(List<String> keys) {
// 按节点分组
Map<RedisConsistentHashSharding.RedisNode, List<String>> nodeKeysMap = new HashMap<>();
for (String key : keys) {
RedisConsistentHashSharding.RedisNode node = sharding.getNode(key);
nodeKeysMap.computeIfAbsent(node, k -> new ArrayList<>()).add(key);
}
// 并行执行
List<Future<Map<String, String>>> futures = new ArrayList<>();
for (Map.Entry<RedisConsistentHashSharding.RedisNode, List<String>> entry : nodeKeysMap.entrySet()) {
RedisConsistentHashSharding.RedisNode node = entry.getKey();
List<String> nodeKeys = entry.getValue();
Future<Map<String, String>> future = executorService.submit(() -> {
Map<String, String> result = new HashMap<>();
for (String key : nodeKeys) {
try {
String value = node.get(key);
if (value != null) {
result.put(key, value);
}
} catch (Exception e) {
// 记录错误,但不影响其他key的获取
System.err.println("Failed to get key " + key + " from node " + node + ": " + e.getMessage());
}
}
return result;
});
futures.add(future);
}
// 收集结果
Map<String, String> result = new HashMap<>();
for (Future<Map<String, String>> future : futures) {
try {
result.putAll(future.get(5, TimeUnit.SECONDS));
} catch (Exception e) {
System.err.println("Failed to get batch result: " + e.getMessage());
}
}
return result;
}
/**
* 批量设置
*/
public void mset(Map<String, String> keyValues) {
// 按节点分组
Map<RedisConsistentHashSharding.RedisNode, Map<String, String>> nodeDataMap = new HashMap<>();
for (Map.Entry<String, String> entry : keyValues.entrySet()) {
String key = entry.getKey();
String value = entry.getValue();
RedisConsistentHashSharding.RedisNode node = sharding.getNode(key);
nodeDataMap.computeIfAbsent(node, k -> new HashMap<>()).put(key, value);
}
// 并行执行
List<Future<Void>> futures = new ArrayList<>();
for (Map.Entry<RedisConsistentHashSharding.RedisNode, Map<String, String>> entry : nodeDataMap.entrySet()) {
RedisConsistentHashSharding.RedisNode node = entry.getKey();
Map<String, String> nodeData = entry.getValue();
Future<Void> future = executorService.submit(() -> {
for (Map.Entry<String, String> dataEntry : nodeData.entrySet()) {
try {
node.set(dataEntry.getKey(), dataEntry.getValue());
} catch (Exception e) {
System.err.println("Failed to set key " + dataEntry.getKey() + " to node " + node + ": " + e.getMessage());
}
}
return null;
});
futures.add(future);
}
// 等待所有操作完成
for (Future<Void> future : futures) {
try {
future.get(5, TimeUnit.SECONDS);
} catch (Exception e) {
System.err.println("Failed to complete batch set: " + e.getMessage());
}
}
}
/**
* 从备用节点获取数据
*/
private String getFromBackupNode(String key, RedisConsistentHashSharding.RedisNode failedNode) {
List<RedisConsistentHashSharding.RedisNode> allNodes = sharding.getAllNodes();
for (RedisConsistentHashSharding.RedisNode node : allNodes) {
if (!node.equals(failedNode)) {
try {
return node.get(key);
} catch (Exception e) {
// 继续尝试下一个节点
}
}
}
throw new RuntimeException("All Redis nodes are unavailable");
}
/**
* 设置数据到备用节点
*/
private void setToBackupNode(String key, String value, RedisConsistentHashSharding.RedisNode failedNode) {
List<RedisConsistentHashSharding.RedisNode> allNodes = sharding.getAllNodes();
for (RedisConsistentHashSharding.RedisNode node : allNodes) {
if (!node.equals(failedNode)) {
try {
node.set(key, value);
return;
} catch (Exception e) {
// 继续尝试下一个节点
}
}
}
throw new RuntimeException("All Redis nodes are unavailable");
}
/**
* 添加节点
*/
public void addNode(String host, int port, String password) {
RedisConsistentHashSharding.RedisNode newNode =
new RedisConsistentHashSharding.RedisNode(host, port, password);
sharding.addNode(newNode);
}
/**
* 移除节点
*/
public void removeNode(String host, int port) {
RedisConsistentHashSharding.RedisNode nodeToRemove =
new RedisConsistentHashSharding.RedisNode(host, port, null);
sharding.removeNode(nodeToRemove);
}
/**
* 关闭客户端
*/
public void close() {
executorService.shutdown();
try {
if (!executorService.awaitTermination(5, TimeUnit.SECONDS)) {
executorService.shutdownNow();
}
} catch (InterruptedException e) {
executorService.shutdownNow();
}
// 关闭所有Redis连接
for (RedisConsistentHashSharding.RedisNode node : sharding.getAllNodes()) {
node.close();
}
}
}
Redis集群监控与运维
集群监控脚本
#!/bin/bash
# scripts/redis_cluster_monitor.sh
set -euo pipefail
# 配置参数
CLUSTER_NODES=(
"192.168.1.10:7000"
"192.168.1.11:7000"
"192.168.1.12:7000"
"192.168.1.13:7000"
"192.168.1.14:7000"
"192.168.1.15:7000"
)
REDIS_PASSWORD="your_password"
CHECK_INTERVAL="${CHECK_INTERVAL:-60}"
ALERT_THRESHOLD_MEMORY="${ALERT_THRESHOLD_MEMORY:-80}"
ALERT_THRESHOLD_CPU="${ALERT_THRESHOLD_CPU:-80}"
ALERT_THRESHOLD_CONNECTIONS="${ALERT_THRESHOLD_CONNECTIONS:-80}"
# 日志函数
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" >&2
}
# 检查节点连接
check_node_connection() {
local node_addr="$1"
local node_ip
local node_port
IFS=':' read -r node_ip node_port <<< "${node_addr}"
if redis-cli -h "${node_ip}" -p "${node_port}" -a "${REDIS_PASSWORD}" ping > /dev/null 2>&1; then
return 0
else
return 1
fi
}
# 获取节点信息
get_node_info() {
local node_addr="$1"
local node_ip
local node_port
IFS=':' read -r node_ip node_port <<< "${node_addr}"
# 获取节点基本信息
local info
info=$(redis-cli -h "${node_ip}" -p "${node_port}" -a "${REDIS_PASSWORD}" info server,memory,clients,stats 2>/dev/null || echo "")
if [[ -z "${info}" ]]; then
echo "ERROR: 无法获取节点信息"
return 1
fi
# 解析信息
local redis_version
local used_memory
local used_memory_human
local used_memory_peak
local used_memory_peak_human
local connected_clients
local total_commands_processed
local instantaneous_ops_per_sec
redis_version=$(echo "${info}" | grep "redis_version:" | cut -d: -f2 | tr -d '\r')
used_memory=$(echo "${info}" | grep "used_memory:" | cut -d: -f2 | tr -d '\r')
used_memory_human=$(echo "${info}" | grep "used_memory_human:" | cut -d: -f2 | tr -d '\r')
used_memory_peak=$(echo "${info}" | grep "used_memory_peak:" | cut -d: -f2 | tr -d '\r')
used_memory_peak_human=$(echo "${info}" | grep "used_memory_peak_human:" | cut -d: -f2 | tr -d '\r')
connected_clients=$(echo "${info}" | grep "connected_clients:" | cut -d: -f2 | tr -d '\r')
total_commands_processed=$(echo "${info}" | grep "total_commands_processed:" | cut -d: -f2 | tr -d '\r')
instantaneous_ops_per_sec=$(echo "${info}" | grep "instantaneous_ops_per_sec:" | cut -d: -f2 | tr -d '\r')
# 输出节点信息
cat << EOF
节点: ${node_addr}
Redis版本: ${redis_version}
内存使用: ${used_memory_human} (${used_memory} bytes)
内存峰值: ${used_memory_peak_human} (${used_memory_peak} bytes)
连接数: ${connected_clients}
总命令数: ${total_commands_processed}
当前QPS: ${instantaneous_ops_per_sec}
EOF
}
# 检查集群状态
check_cluster_status() {
local first_node="${CLUSTER_NODES[0]}"
local node_ip
local node_port
IFS=':' read -r node_ip node_port <<< "${first_node}"
log "检查集群状态..."
# 获取集群信息
local cluster_info
cluster_info=$(redis-cli -h "${node_ip}" -p "${node_port}" -a "${REDIS_PASSWORD}" cluster info 2>/dev/null || echo "")
if [[ -z "${cluster_info}" ]]; then
log "ERROR: 无法获取集群信息"
send_alert "Redis集群状态检查失败"
return 1
fi
# 解析集群状态
local cluster_state
local cluster_slots_assigned
local cluster_slots_ok
local cluster_slots_pfail
local cluster_slots_fail
local cluster_known_nodes
local cluster_size
cluster_state=$(echo "${cluster_info}" | grep "cluster_state:" | cut -d: -f2 | tr -d '\r')
cluster_slots_assigned=$(echo "${cluster_info}" | grep "cluster_slots_assigned:" | cut -d: -f2 | tr -d '\r')
cluster_slots_ok=$(echo "${cluster_info}" | grep "cluster_slots_ok:" | cut -d: -f2 | tr -d '\r')
cluster_slots_pfail=$(echo "${cluster_info}" | grep "cluster_slots_pfail:" | cut -d: -f2 | tr -d '\r')
cluster_slots_fail=$(echo "${cluster_info}" | grep "cluster_slots_fail:" | cut -d: -f2 | tr -d '\r')
cluster_known_nodes=$(echo "${cluster_info}" | grep "cluster_known_nodes:" | cut -d: -f2 | tr -d '\r')
cluster_size=$(echo "${cluster_info}" | grep "cluster_size:" | cut -d: -f2 | tr -d '\r')
log "集群状态: ${cluster_state}"
log "已分配槽位: ${cluster_slots_assigned}/16384"
log "正常槽位: ${cluster_slots_ok}"
log "可能失败槽位: ${cluster_slots_pfail}"
log "失败槽位: ${cluster_slots_fail}"
log "已知节点数: ${cluster_known_nodes}"
log "集群大小: ${cluster_size}"
# 检查集群状态
if [[ "${cluster_state}" != "ok" ]]; then
log "WARNING: 集群状态异常: ${cluster_state}"
send_alert "Redis集群状态异常: ${cluster_state}"
fi
if [[ "${cluster_slots_fail}" -gt 0 ]]; then
log "ERROR: 存在失败的槽位: ${cluster_slots_fail}"
send_alert "Redis集群存在失败的槽位: ${cluster_slots_fail}"
fi
if [[ "${cluster_slots_pfail}" -gt 0 ]]; then
log "WARNING: 存在可能失败的槽位: ${cluster_slots_pfail}"
send_alert "Redis集群存在可能失败的槽位: ${cluster_slots_pfail}"
fi
}
# 检查节点状态
check_nodes_status() {
log "检查所有节点状态..."
local failed_nodes=()
for node in "${CLUSTER_NODES[@]}"; do
if check_node_connection "${node}"; then
log "节点 ${node} 连接正常"
# 获取详细信息
get_node_info "${node}"
echo "---"
else
log "ERROR: 节点 ${node} 连接失败"
failed_nodes+=("${node}")
fi
done
# 如果有失败的节点,发送告警
if [[ ${#failed_nodes[@]} -gt 0 ]]; then
local failed_list
failed_list=$(IFS=','; echo "${failed_nodes[*]}")
send_alert "Redis节点连接失败: ${failed_list}"
fi
}
# 检查性能指标
check_performance_metrics() {
log "检查性能指标..."
for node in "${CLUSTER_NODES[@]}"; do
local node_ip
local node_port
IFS=':' read -r node_ip node_port <<< "${node}"
if ! check_node_connection "${node}"; then
continue
fi
# 获取内存使用率
local used_memory
local maxmemory
used_memory=$(redis-cli -h "${node_ip}" -p "${node_port}" -a "${REDIS_PASSWORD}" info memory | grep "used_memory:" | cut -d: -f2 | tr -d '\r')
maxmemory=$(redis-cli -h "${node_ip}" -p "${node_port}" -a "${REDIS_PASSWORD}" config get maxmemory | tail -1)
if [[ "${maxmemory}" != "0" && -n "${used_memory}" ]]; then
local memory_usage_percent
memory_usage_percent=$((used_memory * 100 / maxmemory))
if [[ "${memory_usage_percent}" -gt "${ALERT_THRESHOLD_MEMORY}" ]]; then
log "WARNING: 节点 ${node} 内存使用率过高: ${memory_usage_percent}%"
send_alert "Redis节点内存使用率过高: ${node} ${memory_usage_percent}%"
fi
fi
# 检查连接数
local connected_clients
local maxclients
connected_clients=$(redis-cli -h "${node_ip}" -p "${node_port}" -a "${REDIS_PASSWORD}" info clients | grep "connected_clients:" | cut -d: -f2 | tr -d '\r')
maxclients=$(redis-cli -h "${node_ip}" -p "${node_port}" -a "${REDIS_PASSWORD}" config get maxclients | tail -1)
if [[ -n "${connected_clients}" && -n "${maxclients}" && "${maxclients}" != "0" ]]; then
local connections_usage_percent
connections_usage_percent=$((connected_clients * 100 / maxclients))
if [[ "${connections_usage_percent}" -gt "${ALERT_THRESHOLD_CONNECTIONS}" ]]; then
log "WARNING: 节点 ${node} 连接数使用率过高: ${connections_usage_percent}%"
send_alert "Redis节点连接数使用率过高: ${node} ${connections_usage_percent}%"
fi
fi
# 检查慢查询
local slowlog_len
slowlog_len=$(redis-cli -h "${node_ip}" -p "${node_port}" -a "${REDIS_PASSWORD}" slowlog len)
if [[ "${slowlog_len}" -gt 10 ]]; then
log "WARNING: 节点 ${node} 慢查询数量过多: ${slowlog_len}"
send_alert "Redis节点慢查询数量过多: ${node} ${slowlog_len}"
fi
done
}
# 发送告警
send_alert() {
local message="$1"
# 发送到Slack
if [[ -n "${SLACK_WEBHOOK_URL:-}" ]]; then
curl -X POST "${SLACK_WEBHOOK_URL}" \
-H 'Content-type: application/json' \
-d "{\"text\":\"🚨 Redis Cluster Alert: ${message}\"}" || true
fi
# 发送邮件
if command -v mail &> /dev/null; then
echo "${message}" | mail -s "Redis Cluster Alert" "${ALERT_EMAIL:-admin@company.com}" || true
fi
log "Alert sent: ${message}"
}
# 生成监控报告
generate_report() {
local report_file="/tmp/redis_cluster_report_$(date +%Y%m%d_%H%M%S).txt"
{
echo "Redis集群监控报告"
echo "生成时间: $(date)"
echo "================================"
echo ""
echo "集群状态:"
check_cluster_status
echo ""
echo "节点状态:"
check_nodes_status
echo ""
echo "性能指标:"
check_performance_metrics
} > "${report_file}"
log "监控报告已生成: ${report_file}"
}
# 主循环
main() {
log "开始Redis集群监控"
while true; do
# 检查集群状态
check_cluster_status
# 检查节点状态
check_nodes_status
# 检查性能指标
check_performance_metrics
# 生成报告(每小时一次)
local current_minute
current_minute=$(date +%M)
if [[ "${current_minute}" == "00" ]]; then
generate_report
fi
log "监控周期完成,等待 ${CHECK_INTERVAL} 秒"
sleep "${CHECK_INTERVAL}"
done
}
# 信号处理
cleanup() {
log "监控程序退出"
exit 0
}
trap cleanup SIGTERM SIGINT
# 启动监控
main "$@"
性能优化与最佳实践
Redis性能调优配置
# redis-performance.conf
# 网络优化
tcp-backlog 511
tcp-keepalive 300
timeout 0
# 内存优化
maxmemory-policy allkeys-lru
maxmemory-samples 5
# 数据结构优化
hash-max-ziplist-entries 512
hash-max-ziplist-value 64
list-max-ziplist-size -2
list-compress-depth 0
set-max-intset-entries 512
zset-max-ziplist-entries 128
zset-max-ziplist-value 64
# 持久化优化
save 900 1
save 300 10
save 60 10000
stop-writes-on-bgsave-error yes
rdbcompression yes
rdbchecksum yes
# AOF优化
appendonly yes
appendfsync everysec
no-appendfsync-on-rewrite no
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb
# 复制优化
repl-diskless-sync no
repl-diskless-sync-delay 5
repl-ping-slave-period 10
repl-timeout 60
repl-disable-tcp-nodelay no
repl-backlog-size 1mb
repl-backlog-ttl 3600
# 客户端优化
maxclients 10000
client-output-buffer-limit normal 0 0 0
client-output-buffer-limit slave 256mb 64mb 60
client-output-buffer-limit pubsub 32mb 8mb 60
# 慢查询优化
slowlog-log-slower-than 10000
slowlog-max-len 128
# 集群优化
cluster-node-timeout 15000
cluster-require-full-coverage no
cluster-slave-validity-factor 10
cluster-migration-barrier 1
性能测试脚本
#!/bin/bash
# scripts/redis_performance_test.sh
set -euo pipefail
# 配置参数
REDIS_HOST="${REDIS_HOST:-127.0.0.1}"
REDIS_PORT="${REDIS_PORT:-6379}"
REDIS_PASSWORD="${REDIS_PASSWORD:-}"
TEST_DURATION="${TEST_DURATION:-60}"
CONCURRENT_CLIENTS="${CONCURRENT_CLIENTS:-50}"
PIPELINE_SIZE="${PIPELINE_SIZE:-16}"
# 测试结果目录
RESULTS_DIR="/tmp/redis_performance_$(date +%Y%m%d_%H%M%S)"
mkdir -p "${RESULTS_DIR}"
# 日志函数
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "${RESULTS_DIR}/test.log"
}
# 基准测试
run_benchmark() {
local test_name="$1"
local test_command="$2"
log "开始测试: ${test_name}"
local auth_option=""
if [[ -n "${REDIS_PASSWORD}" ]]; then
auth_option="-a ${REDIS_PASSWORD}"
fi
# 运行测试
redis-benchmark \
-h "${REDIS_HOST}" \
-p "${REDIS_PORT}" \
${auth_option} \
-c "${CONCURRENT_CLIENTS}" \
-n 100000 \
-d 100 \
-P "${PIPELINE_SIZE}" \
-t "${test_command}" \
--csv > "${RESULTS_DIR}/${test_name}.csv"
# 解析结果
local ops_per_sec
ops_per_sec=$(tail -1 "${RESULTS_DIR}/${test_name}.csv" | cut -d',' -f2 | tr -d '"')
log "测试 ${test_name} 完成,QPS: ${ops_per_sec}"
}
# 内存使用测试
test_memory_usage() {
log "开始内存使用测试"
local auth_option=""
if [[ -n "${REDIS_PASSWORD}" ]]; then
auth_option="-a ${REDIS_PASSWORD}"
fi
# 获取初始内存使用
local initial_memory
initial_memory=$(redis-cli -h "${REDIS_HOST}" -p "${REDIS_PORT}" ${auth_option} info memory | grep "used_memory:" | cut -d: -f2 | tr -d '\r')
log "初始内存使用: ${initial_memory} bytes"
# 写入测试数据
for i in {1..10000}; do
redis-cli -h "${REDIS_HOST}" -p "${REDIS_PORT}" ${auth_option} set "test_key_${i}" "test_value_${i}_$(date +%s)" > /dev/null
done
# 获取写入后内存使用
local after_write_memory
after_write_memory=$(redis-cli -h "${REDIS_HOST}" -p "${REDIS_PORT}" ${auth_option} info memory | grep "used_memory:" | cut -d: -f2 | tr -d '\r')
log "写入后内存使用: ${after_write_memory} bytes"
# 计算内存增长
local memory_growth
memory_growth=$((after_write_memory - initial_memory))
log "内存增长: ${memory_growth} bytes"
# 清理测试数据
for i in {1..10000}; do
redis-cli -h "${REDIS_HOST}" -p "${REDIS_PORT}" ${auth_option} del "test_key_${i}" > /dev/null
done
# 获取清理后内存使用
local after_cleanup_memory
after_cleanup_memory=$(redis-cli -h "${REDIS_HOST}" -p "${REDIS_PORT}" ${auth_option} info memory | grep "used_memory:" | cut -d: -f2 | tr -d '\r')
log "清理后内存使用: ${after_cleanup_memory} bytes"
}
# 延迟测试
test_latency() {
log "开始延迟测试"
local auth_option=""
if [[ -n "${REDIS_PASSWORD}" ]]; then
auth_option="-a ${REDIS_PASSWORD}"
fi
# 运行延迟测试
redis-cli -h "${REDIS_HOST}" -p "${REDIS_PORT}" ${auth_option} --latency-history -i 1 > "${RESULTS_DIR}/latency.log" &
local latency_pid=$!
# 运行30秒
sleep 30
# 停止延迟测试
kill ${latency_pid} || true
# 分析延迟数据
if [[ -f "${RESULTS_DIR}/latency.log" ]]; then
local avg_latency
avg_latency=$(awk '{sum+=$4; count++} END {print sum/count}' "${RESULTS_DIR}/latency.log")
log "平均延迟: ${avg_latency} ms"
fi
}
# 生成性能报告
generate_performance_report() {
local report_file="${RESULTS_DIR}/performance_report.html"
cat > "${report_file}" << 'EOF'
<!DOCTYPE html>
<html>
<head>
<title>Redis性能测试报告</title>
<style>
body { font-family: Arial, sans-serif; margin: 20px; }
.header { background-color: #f0f0f0; padding: 20px; border-radius: 5px; }
.section { margin: 20px 0; }
.metric { background-color: #f9f9f9; padding: 10px; margin: 10px 0; border-left: 4px solid #007cba; }
table { border-collapse: collapse; width: 100%; }
th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
th { background-color: #f2f2f2; }
</style>
</head>
<body>
<div class="header">
<h1>Redis性能测试报告</h1>
<p>测试时间: $(date)</p>
<p>Redis服务器: ${REDIS_HOST}:${REDIS_PORT}</p>
<p>并发客户端: ${CONCURRENT_CLIENTS}</p>
<p>管道大小: ${PIPELINE_SIZE}</p>
</div>
<div class="section">
<h2>基准测试结果</h2>
<table>
<tr>
<th>测试项目</th>
<th>QPS</th>
<th>平均延迟(ms)</th>
</tr>
EOF
# 添加测试结果
for csv_file in "${RESULTS_DIR}"/*.csv; do
if [[ -f "${csv_file}" ]]; then
local test_name
test_name=$(basename "${csv_file}" .csv)
local qps
qps=$(tail -1 "${csv_file}" | cut -d',' -f2 | tr -d '"')
local latency
latency=$(tail -1 "${csv_file}" | cut -d',' -f3 | tr -d '"')
echo " <tr><td>${test_name}</td><td>${qps}</td><td>${latency}</td></tr>" >> "${report_file}"
fi
done
cat >> "${report_file}" << 'EOF'
</table>
</div>
<div class="section">
<h2>系统信息</h2>
<div class="metric">
<strong>Redis版本:</strong> $(redis-cli -h "${REDIS_HOST}" -p "${REDIS_PORT}" ${auth_option} info server | grep "redis_version" | cut -d: -f2 | tr -d '\r')
</div>
<div class="metric">
<strong>操作系统:</strong> $(redis-cli -h "${REDIS_HOST}" -p "${REDIS_PORT}" ${auth_option} info server | grep "os" | cut -d: -f2 | tr -d '\r')
</div>
<div class="metric">
<strong>内存使用:</strong> $(redis-cli -h "${REDIS_HOST}" -p "${REDIS_PORT}" ${auth_option} info memory | grep "used_memory_human" | cut -d: -f2 | tr -d '\r')
</div>
</div>
</body>
</html>
EOF
log "性能报告已生成: ${report_file}"
}
# 主函数
main() {
log "开始Redis性能测试"
# 基准测试
run_benchmark "set" "set"
run_benchmark "get" "get"
run_benchmark "incr" "incr"
run_benchmark "lpush" "lpush"
run_benchmark "rpush" "rpush"
run_benchmark "lpop" "lpop"
run_benchmark "rpop" "rpop"
run_benchmark "sadd" "sadd"
run_benchmark "hset" "hset"
run_benchmark "spop" "spop"
run_benchmark "zadd" "zadd"
run_benchmark "zpopmin" "zpopmin"
run_benchmark "lrange_100" "lrange_100"
run_benchmark "lrange_300" "lrange_300"
run_benchmark "lrange_500" "lrange_500"
run_benchmark "lrange_600" "lrange_600"
run_benchmark "mset" "mset"
# 内存使用测试
test_memory_usage
# 延迟测试
test_latency
# 生成报告
generate_performance_report
log "Redis性能测试完成,结果保存在: ${RESULTS_DIR}"
}
# 执行主函数
main "$@"
总结
Redis集群架构的设计和实践需要综合考虑业务需求、数据规模、性能要求等多个因素。本文详细介绍了从单机Redis到分布式集群的完整演进路径,包括:
关键要点
- 架构选择:根据业务规模选择合适的Redis部署模式
- 高可用设计:通过主从复制、哨兵模式、集群模式实现高可用
- 性能优化:合理配置参数,优化数据结构和持久化策略
- 监控运维:建立完善的监控体系,及时发现和解决问题
- 扩展策略:采用一致性哈希等算法实现平滑扩容
最佳实践
- 容量规划:提前规划内存使用,设置合理的淘汰策略
- 安全加固:启用密码认证,限制网络访问
- 备份策略:定期备份数据,测试恢复流程
- 性能监控:持续监控关键指标,优化慢查询
- 故障演练:定期进行故障演练,验证高可用机制
通过合理的架构设计和运维实践,Redis集群能够为企业提供稳定、高效的缓存和数据存储服务,支撑业务的快速发展。