在生产环境不存在单体果奔的数据库,且云厂商rds等高可用数据库性价比极低,因此很多场景需要我们使用实例自建集群。本章介绍如何从0开始搭建redis7三主三从基础集群环境与故障迁移模拟测试。
预备姿势
- vm模拟环境
- vmware
- cetos7.9 //关闭防火墙与selinux
- redis7 | 6 均可
- xshell或其他ssh工具
- 阿里云环境
- 六台ECS
- 集群最低三个主节点 ,也可单机模仿多实例
- 配置防火墙或iptable规则内网开启7000与6379
- 配置 集群生产环境网络延迟应低于 2ms
- 配置安全组 需开启对应端口
- xshell或其他ssh工具
set k1 v1
.
.
CRC16算法
.
.
数据槽位
.
MSATER . MSATER . MSATER
. . .
. . .
. . .
. . .
. . .
SLAVE SLAVE SLAVE
准备分发脚本 distributeFile.sh
# 建议配置ssh免密或ansible工具快速实现,此处为快速搭建直接使用分发脚本,且该脚本复用率很高,也可以使用公有云运维编排等工具
# 准备内网ip文件或写入/etc/hosts文件
# 内网环境此处示例使用简单脚本分发文件
[root@iZ0jlapur4hqjd112w7dscZ /]# cat hosts_file
172.16.1.24
172.16.1.26
172.16.1.27
172.16.1.28
172.16.1.29
172.16.1.30
#!/bin/bash
# distributeFile.sh
# 集群分发脚本 用于分发配置文件 tar包等
# 使用自写IP文件
IP=`cat /hosts_file`
# 使用hosts文件
# IP=$(awk '{print $1}' /etc/hosts | grep -Ev '^$' | grep '^[0-9]'| tr '\n' ' ')
# 主机密码
PW='Thmm000000!'
read -p "src" src
read -p "dest" dest
for i in $IP
do
echo "${i}"
# 需要先跑一遍ssh添加host认证避免后续陌生主机拒绝访问
sshpass -p "${PW}" ssh -o StrictHostKeyChecking=no root@${i} "hostname"
# $src 源文件 $dest 目标地址
sshpass -p "${PW}" rsync -av $src ${i}:$dest
done
编译安装参考单节点搭建
redis.conf修改项
# 保护模式关闭
protected-mode no
# 指定端口
port 6379
# 守护进程模式启动(即为后台启动,不会被普通操作中断)
daemonize yes
# ip绑定 ,填写你的客户端ip 搭建学习直接注释掉果奔也行
bind xxx xxx
# 打开rdb持久化
# save "时间秒 修改次数"
save "xxx xxx"
# 开启aof持久化
appendonly yes
# 设置密码 PWD为密码
requirepass PWD
# 日志文件路径
logfile /var/log/redis7_6379.log
单节点添加系统管理
非必选项,少部分版本需要添加,个人认为和安装方式有关
添加至 /lib/systemd/system/redis.service
或 /etc/systemd/system/redis.service
方便后期systemctl服务管理
# 示例文件可根据需求更改 启动命令为必选项
[Unit]
Description=redis
After=network.target
[Service]
Type=forking
PIDFile=/run/redis/redis.pid
ExecStart=/usr/local/redis/bin/redis-server /usr/local/redis/etc/redis.conf
ExecReload=/bin/kill -s HUP $MAINPID
ExecStop=/bin/kill -s QUIT $MAINPID
PrivateTmp=true
[Install]
WantedBy=multi-user.target
单机测试启动redis 服务
# 部分版本服务名为redis-server
systemctl daemon-reload
systemctl start redis.service
分发服务文件
sh distributeFile.sh
src/etc/systemd/system/redis.service
dest/etc/systemd/system/
集群命令脚本 clusterCmd.sh
#!/bin/bash
# clusterCmd.sh
IP=`cat /hosts_file`
# 也可以使用hosts文件
# IP=$(awk '{print $1}' /etc/hosts | grep -Ev '^$' | grep '^[0-9]'| tr '\n' ' ')
# 主机密码
PW='Thmm000000!'
for i in $IP
do
echo '"${i}"=====started'
sshpass -p "${PW}" ssh -o StrictHostKeyChecking=no root@${i} "$1"
echo '"${i}"====="OK"'
done
##################################################
# 测试安装 群起redis
[root@iZ0jlapur4hqjd112w7dscZ ~]# sh test3.sh "systemctl start redis "
172.16.1.24
172.16.1.26
172.16.1.27
172.16.1.28
172.16.1.29
172.16.1.30
集群配置
# cluster.conf 启动集群最小化配置文件,默认不存在
# 指定端口范围
port 7000
# 开启集群模式
cluster-enabled yes
# 用于追加开启集群后更改信息的配置文件
cluster-config-file nodes.conf
# 节点通信超时时间
cluster-node-timeout 5000
# 保护模式
protected-mode no
# 密码
requirepass "123456"
#后台以守护模式运行
daemonize yes
启动集群
clusterCmd.sh "/usr/local/redis/bin/redis-server /usr/local/redis/etc/cluster.conf"
# 查看端口进程
ps -ef | grep redis
ss -nutlp | grep 7000
[root@iZ0jlapur4hqjd112w7dscZ ~]# redis-cli -p 7000 -a 123456
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
127.0.0.1:7000> ping
PONG
# 测试通过单机启动成功
初始化集群
# --cluster create构建集群
# --cluster-replicas 1 每个MSATER一个SLAVE
# 注意集群最小条件三台MASTER 三主三从为集群高可用
[root@iZ0jlfj6e8oorlm53wxj4dZ ~]# redis-cli -a 123456 --cluster create 172.16.1.31:7000 172.16.1.32:7000 172.16.1.33:7000 172.16.1.34:7000 172.16.1.35:7000 172.16.1.36:7000 --cluster-replicas 1
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
>>> Performing hash slots allocation on 6 nodes...
Master[0] -> Slots 0 - 5460
Master[1] -> Slots 5461 - 10922
Master[2] -> Slots 10923 - 16383
Adding replica 172.16.1.35:7000 to 172.16.1.31:7000
Adding replica 172.16.1.36:7000 to 172.16.1.32:7000
Adding replica 172.16.1.34:7000 to 172.16.1.33:7000
M: 4297fd29b3f718b8646b7241b021f5fe56a94fae 172.16.1.31:7000
slots:[0-5460] (5461 slots) master
M: 1a4dfa6a5b4b2f5be130495c9fb9d377b25c27e2 172.16.1.32:7000
slots:[5461-10922] (5462 slots) master
M: 677cfce4484419c2615f16fb76cd810125349c18 172.16.1.33:7000
slots:[10923-16383] (5461 slots) master
S: e33bdd68c53afb8ada4556007d26eafe3e876445 172.16.1.34:7000
replicates 677cfce4484419c2615f16fb76cd810125349c18
S: 621e5c74b780402aa3970c9db19d93c09fef72ce 172.16.1.35:7000
replicates 4297fd29b3f718b8646b7241b021f5fe56a94fae
S: 1073d36c622dca0ac1d6b8110a268fdd1caf4afd 172.16.1.36:7000
replicates 1a4dfa6a5b4b2f5be130495c9fb9d377b25c27e2
Can I set the above configuration? (type 'yes' to accept): yes
Can I set the above configuration? (type 'yes' to accept): yes
>>> Nodes configuration updated
>>> Assign a different config epoch to each node
>>> Sending CLUSTER MEET messages to join the cluster
Waiting for the cluster to join
.
>>> Performing Cluster Check (using node 172.16.1.31:7000)
M: 4297fd29b3f718b8646b7241b021f5fe56a94fae 172.16.1.31:7000
slots:[0-5460] (5461 slots) master
1 additional replica(s)
S: e33bdd68c53afb8ada4556007d26eafe3e876445 172.16.1.34:7000
slots: (0 slots) slave
replicates 677cfce4484419c2615f16fb76cd810125349c18
S: 1073d36c622dca0ac1d6b8110a268fdd1caf4afd 172.16.1.36:7000
slots: (0 slots) slave
replicates 1a4dfa6a5b4b2f5be130495c9fb9d377b25c27e2
M: 677cfce4484419c2615f16fb76cd810125349c18 172.16.1.33:7000
slots:[10923-16383] (5461 slots) master
1 additional replica(s)
S: 621e5c74b780402aa3970c9db19d93c09fef72ce 172.16.1.35:7000
slots: (0 slots) slave
replicates 4297fd29b3f718b8646b7241b021f5fe56a94fae
M: 1a4dfa6a5b4b2f5be130495c9fb9d377b25c27e2 172.16.1.32:7000
slots:[5461-10922] (5462 slots) master
1 additional replica(s)
[OK] All nodes agree about slots configuration.
>>> Check for open slots...
>>> Check slots coverage...
[OK] All 16384 slots covered.
# 集群为我们自动分配槽点,为我们构建了三个MASTER节点和SLAVE节点
# 三个MSATER各持有5461个槽,共计16384个
数据路由到对应槽位 -c
# 我们通过cli连接服务端,集群服务端默认使用CRC16算法处理落点一致性问题。
# 通过算法我们每第一次存入的节点都是不确定的。
# 在此基础得出节点需要相互登录进行增删改查操作。
[root@iZ0jlfj6e8oorlm53wxj4dZ ~]# redis-cli -a 123456 -p 7000
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
127.0.0.1:7000> ping
PONG
127.0.0.1:7000> set k1 v1
(error) MOVED 12706 172.16.1.33:7000 # 未加-c此处报错无法移动槽位
127.0.0.1:7000> set k2 v2
OK
添加-c 参数后我们发现ip在跟随槽位变动
[root@iZ0jlfj6e8oorlm53wxj4dZ ~]# redis-cli -a 123456 -p 7000 -c
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
127.0.0.1:7000> set k1 v1 #注意观察ip变化
-> Redirected to slot [12706] located at 172.16.1.33:7000
OK
172.16.1.33:7000> set k2 v2
-> Redirected to slot [449] located at 172.16.1.31:7000
OK
172.16.1.31:7000>
下图槽位落点函数 cluster.c 中的 keyHashSlot()
/* -----------------------------------------------------------------------------
* Key space handling
* -------------------------------------------------------------------------- */
/* We have 16384 hash slots. The hash slot of a given key is obtained
* as the least significant 14 bits of the crc16 of the key.
*
* However if the key contains the {...} pattern, only the part between
* { and } is hashed. This may be useful in the future to force certain
* keys to be in the same node (assuming no resharding is in progress). */
unsigned int keyHashSlot(char *key, int keylen) {
//在集群操作时我们常常需要使用{}作为数据集进行操作
//可以使我们做大量数据集操作时存储在同一节点增加效率,也方便查询
//crc16就是一串行列式,算法不好,勉强看懂,无能解读,感兴趣的看官自行研究
int s, e; /* start-end indexes of { and } */
for (s = 0; s < keylen; s++)
if (key[s] == '{') break;
/* No '{' ? Hash the whole key. This is the base case. */
if (s == keylen) return crc16(key,keylen) & 0x3FFF;
/* '{' found? Check if we have the corresponding '}'. */
for (e = s+1; e < keylen; e++)
if (key[e] == '}') break;
/* No '}' or nothing between {} ? Hash the whole key. */
if (e == keylen || e == s+1) return crc16(key,keylen) & 0x3FFF;
/* If we are here there is both a { and a } on its right. Hash
* what is in the middle between { and }. */
return crc16(key+s+1,e-s-1) & 0x3FFF;
}
集群命令
cluster nodes # 查看节点关键信息
127.0.0.1:7000> cluster nodes
e33bdd68c53afb8ada4556007d26eafe3e876445 172.16.1.34:7000@17000 slave 677cfce4484419c2615f16fb76cd810125349c18 0 1689906296000 4 connected
1073d36c622dca0ac1d6b8110a268fdd1caf4afd 172.16.1.36:7000@17000 slave 1a4dfa6a5b4b2f5be130495c9fb9d377b25c27e2 0 1689906297000 6 connected
677cfce4484419c2615f16fb76cd810125349c18 172.16.1.33:7000@17000 master - 0 1689906297651 3 connected 10923-16383
621e5c74b780402aa3970c9db19d93c09fef72ce 172.16.1.35:7000@17000 slave 4297fd29b3f718b8646b7241b021f5fe56a94fae 0 1689906297151 5 connected
1a4dfa6a5b4b2f5be130495c9fb9d377b25c27e2 172.16.1.32:7000@17000 master - 0 1689906296000 2 connected 5461-10922
4297fd29b3f718b8646b7241b021f5fe56a94fae 172.16.1.31:7000@17000 myself,master - 0 1689906296000 1 connected 0-5460
cluster info # 查看集群主要信息,如状态,节点数,通信情况等
127.0.0.1:7000> cluster info
cluster_state:ok
cluster_slots_assigned:16384
cluster_slots_ok:16384
cluster_slots_pfail:0
cluster_slots_fail:0
cluster_known_nodes:6
cluster_size:3
cluster_current_epoch:6
cluster_my_epoch:1
cluster_stats_messages_ping_sent:6320
cluster_stats_messages_pong_sent:6283
cluster_stats_messages_sent:12603
cluster_stats_messages_ping_received:6278
cluster_stats_messages_pong_received:6320
cluster_stats_messages_meet_received:5
cluster_stats_messages_received:12603
cluster slots #集群节点id和槽点映射信息
127.0.0.1:7000> cluster slots
1) 1) (integer) 10923
2) (integer) 16383
3) 1) "172.16.1.33"
2) (integer) 7000
3) "677cfce4484419c2615f16fb76cd810125349c18"
4) 1) "172.16.1.34"
2) (integer) 7000
3) "e33bdd68c53afb8ada4556007d26eafe3e876445"
2) 1) (integer) 5461
2) (integer) 10922
3) 1) "172.16.1.32"
2) (integer) 7000
3) "1a4dfa6a5b4b2f5be130495c9fb9d377b25c27e2"
4) 1) "172.16.1.36"
2) (integer) 7000
3) "1073d36c622dca0ac1d6b8110a268fdd1caf4afd"
3) 1) (integer) 0
2) (integer) 5460
3) 1) "172.16.1.31"
2) (integer) 7000
3) "4297fd29b3f718b8646b7241b021f5fe56a94fae"
4) 1) "172.16.1.35"
2) (integer) 7000
3) "621e5c74b780402aa3970c9db19d93c09fef72ce"
cluster countkeysinslot int # 查看槽位是否被占用
127.0.0.1:7000> cluster countkeysinslot 10
(integer) 0
下章 节点变更与扩缩容
标签:部署,redis,cluster,集群,172.16,7000,slots From: https://www.cnblogs.com/tomlong/p/17833163.html