您的位置首页  散文精选

不要告诉别人788df(788DF如何拆)

Redis集群报错cluster_state:fail,如何解决并重新恢复集群(IP问题/ slot未完全分配问题)报错127.0.0.

不要告诉别人788df(788DF如何拆)

 

Redis集群报错cluster_state:fail,如何解决并重新恢复集群(IP问题/ slot未完全分配问题)报错127.0.0.1:6379> setnametom---》测试在redis集群中存数据时报错

-> Redirectedtoslot[5798]locatedat 192.168.3.2:6379 (error) CLUSTERDOWNTheclusterisdown 192.168.3.2:6379

> clusterinfocluster_state:fail---》显示集群状态已关闭 cluster_slots_assigned:16384cluster_slots_ok:10923cluster_slots_pfail

:0cluster_slots_fail:5461cluster_known_nodes:6cluster_size:3cluster_current_epoch:6cluster_my_epoch:2

cluster_stats_messages_ping_sent:2203cluster_stats_messages_pong_sent:392cluster_stats_messages_meet_sent

:4cluster_stats_messages_fail_sent:4cluster_stats_messages_sent:2603cluster_stats_messages_ping_received

:391cluster_stats_messages_pong_received:310cluster_stats_messages_meet_received:1cluster_stats_messages_fail_received

:1cluster_stats_messages_received:703解决查看所有redis日志发现redis-5容器一直在反复连接192.168.3.1:6379这个master节点node-1[root@es-node22

~]#dockerlogs-fredis-5......1:S28May202213:07:53.233# Cluster state changed: fail1:S28May202213:07:53.442

*ConnectingtoMASTER192.168.3.1:63791:S28May202213:07:53.442*MASTERREPLICAsyncstarted1:S28May202213

:07:53.442# Error condition on socket for SYNC: Connection refused1:S28May202213:07:54.481*Connecting

toMASTER192.168.3.1:63791:S28May202213:07:54.481*MASTERREPLICAsyncstarted......查看node-1的redis节点配置文件redis.conf中的节点IP

[root@es-node22~]#cat/root/redis/node-1/conf/redis.confport6379bind0.0.0.0cluster-enabledyescluster-config-file

nodes.conf---》redis集群节点配置文件cluster-node-timeout5000cluster-announce-ip192.168.3.11---》可以看到node-1节点配置文件中IP为192.168.3.11

cluster-announce-port6379cluster-announce-bus-port16379appendonlyyes查看当前的redis集群状态,以数组形式展示192.168.3.2

:6379>clusterslots---》当前的集群状态,以数组形式展示1)1)(integer)109232)(integer)163833)1)"192.168.3.3"2)(integer)6379

3)"ff0d1d636f94d9b092e6012408c1d0918e00e6ed"4)1)"192.168.3.4"2)(integer)63793)"2113cf366ad27ebd73585f03d368e77f03b1a2e1"

2)1)(integer)02)(integer)54603)1)"192.168.3.1"---》可以看到集群中该节点的IP是192.168.3.12)(integer)63793)"c856c94ba8d2c55a0d176831bc85aa34a96fde88"

4)1)"192.168.3.5"2)(integer)63793)"d92ff5984ab29370af0adeaca71e7938c0287ca5"3)1)(integer)54612)(integer)

109223)1)"192.168.3.2"2)(integer)63793)"8b01b1bc6202e1dc7ff9f15013d8200b10ecb3f3"4)1)"192.168.3.6"2)(integer)

63793)"2108a90495c147c675328f9b8b4fa49e2b856faf"查看redis集群节点配置文件nodes.conf[root@es-node22~]#cat/root/redis/node-1/data/nodes.conf

c856c94ba8d2c55a0d176831bc85aa34a96fde88192.168.3.1:6379@16379myself,master-016537432660001connected0

-5460d92ff5984ab29370af0adeaca71e7938c0287ca5192.168.3.5:6379@16379slavec856c94ba8d2c55a0d176831bc85aa34a96fde88

016537432740005connected2108a90495c147c675328f9b8b4fa49e2b856faf192.168.3.6:6379@16379slave8b01b1bc6202e1dc7ff9f15013d8200b10ecb3f3

016537432755316connected2113cf366ad27ebd73585f03d368e77f03b1a2e1192.168.3.4:6379@16379slaveff0d1d636f94d9b092e6012408c1d0918e00e6ed

016537432755314connected8b01b1bc6202e1dc7ff9f15013d8200b10ecb3f3192.168.3.2:6379@16379master-01653743275531

2connected5461-10922ff0d1d636f94d9b092e6012408c1d0918e00e6ed192.168.3.3:6379@16379master-01653743275000

3connected10923-16383varscurrentEpoch6lastVoteEpoch0[root@es-node22~]#cat/root/redis/node-2/data/nodes.conf

ff0d1d636f94d9b092e6012408c1d0918e00e6ed192.168.3.3:6379@16379master-016537432732333connected10923-16383

2113cf366ad27ebd73585f03d368e77f03b1a2e1192.168.3.4:6379@16379slaveff0d1d636f94d9b092e6012408c1d0918e00e6ed

016537432711514connectedc856c94ba8d2c55a0d176831bc85aa34a96fde88192.168.3.1:6379@16379master,fail-1653743267074

16537432669611connected0-5460d92ff5984ab29370af0adeaca71e7938c0287ca5192.168.3.5:6379@16379slavec856c94ba8d2c55a0d176831bc85aa34a96fde88

016537432720001connected8b01b1bc6202e1dc7ff9f15013d8200b10ecb3f3192.168.3.2:6379@16379myself,master-0

16537432710002connected5461-109222108a90495c147c675328f9b8b4fa49e2b856faf192.168.3.6:6379@16379slave8b01b1bc6202e1dc7ff9f15013d8200b10ecb3f3

016537432721946connectedvarscurrentEpoch6lastVoteEpoch0可以看到redis所有节点的集群配置文件nodes.conf中是192.168.3.1:6379,与node-1节点的redis.conf文件中不一致。

 批量修改所有redis节点nodes.conf文件中该节点IP配置[root@es-node22 ~]# for i in $(seq 1 6); do \ > sed -i s/192.168.3.1/192.168.3.11/

/root/redis/node-${i}/data/nodes.conf>done查看修改后的所有redis集群nodes.conf文件[root@es-node22~]#cat/root/redis/node-1/data/nodes.conf

c856c94ba8d2c55a0d176831bc85aa34a96fde88192.168.3.11:6379@16379myself,master-016537432660001connected

0-5460d92ff5984ab29370af0adeaca71e7938c0287ca5192.168.3.5:6379@16379slavec856c94ba8d2c55a0d176831bc85aa34a96fde88

016537432740005connected2108a90495c147c675328f9b8b4fa49e2b856faf192.168.3.6:6379@16379slave8b01b1bc6202e1dc7ff9f15013d8200b10ecb3f3

016537432755316connected2113cf366ad27ebd73585f03d368e77f03b1a2e1192.168.3.4:6379@16379slaveff0d1d636f94d9b092e6012408c1d0918e00e6ed

016537432755314connected8b01b1bc6202e1dc7ff9f15013d8200b10ecb3f3192.168.3.2:6379@16379master-01653743275531

2connected5461-10922ff0d1d636f94d9b092e6012408c1d0918e00e6ed192.168.3.3:6379@16379master-01653743275000

3connected10923-16383varscurrentEpoch6lastVoteEpoch0[root@es-node22~]#cat/root/redis/node-2/data/nodes.conf

ff0d1d636f94d9b092e6012408c1d0918e00e6ed192.168.3.3:6379@16379master-016537432732333connected10923-16383

2113cf366ad27ebd73585f03d368e77f03b1a2e1192.168.3.4:6379@16379slaveff0d1d636f94d9b092e6012408c1d0918e00e6ed

016537432711514connectedc856c94ba8d2c55a0d176831bc85aa34a96fde88192.168.3.11:6379@16379master,fail-1653743267074

16537432669611connected0-5460d92ff5984ab29370af0adeaca71e7938c0287ca5192.168.3.5:6379@16379slavec856c94ba8d2c55a0d176831bc85aa34a96fde88

016537432720001connected8b01b1bc6202e1dc7ff9f15013d8200b10ecb3f3192.168.3.2:6379@16379myself,master-0

16537432710002connected5461-109222108a90495c147c675328f9b8b4fa49e2b856faf192.168.3.6:6379@16379slave8b01b1bc6202e1dc7ff9f15013d8200b10ecb3f3

016537432721946connectedvarscurrentEpoch6lastVoteEpoch0......批量重启redis集群所有节点容器[root@es-node22~]#docker

restart$(dockerps|grepredis|awk{print $1})dcd802a160c66e2f628457f6f05d3dfb9c8b220df78836e931e7b232f1d1

1de91b4d4e68[root@es-node22~]#dockerpsCONTAINERIDIMAGECOMMANDCREATEDSTATUSPORTSNAMES6e2f628457f6redis:5.0.9-alpine3.11

"docker-entrypoint.s…"3hoursagoUp2hours0.0.0.0:6376->6379/tcp,:::6376->6379/tcp,0.0.0.0:16376->16379/tcp,

:::16376->16379/tcpredis-6f05d3dfb9c8bredis:5.0.9-alpine3.11"docker-entrypoint.s…"3hoursagoUp2hours0.0

.0.0:6375->6379/tcp,:::6375->6379/tcp,0.0.0.0:16375->16379/tcp,:::16375->16379/tcpredis-5220df78836e9

redis:5.0.9-alpine3.11"docker-entrypoint.s…"3hoursagoUp2hours0.0.0.0:6374->6379/tcp,:::6374->6379/tcp,

0.0.0.0:16374->16379/tcp,:::16374->16379/tcpredis-431e7b232f1d1redis:5.0.9-alpine3.11"docker-entrypoint.s…"

3hoursagoUp2hours0.0.0.0:6373->6379/tcp,:::6373->6379/tcp,0.0.0.0:16373->16379/tcp,:::16373->16379/tcp

redis-31de91b4d4e68redis:5.0.9-alpine3.11"docker-entrypoint.s…"3hoursagoUp2hours0.0.0.0:6372->6379/tcp,

:::6372->6379/tcp,0.0.0.0:16372->16379/tcp,:::16372->16379/tcpredis-2dcd802a160c6redis:5.0.9-alpine3.11

"docker-entrypoint.s…"3hoursagoUp2hours0.0.0.0:6371->6379/tcp,:::6371->6379/tcp,0.0.0.0:16371->16379/tcp,

:::16371->16379/tcpredis-1重新查看redis集群状态[root@es-node22~]#dockerexec-itredis-1/bin/sh---》redis中默认没有bash解释器

/data# redis-cli -c127.0.0.1:6379>clusterinfocluster_state:ok---》可以看到redis集群状态已经为OKcluster_slots_assigned:16384

cluster_slots_ok:16384cluster_slots_pfail:0cluster_slots_fail:0cluster_known_nodes:6cluster_size:3cluster_current_epoch:6

cluster_my_epoch:1cluster_stats_messages_ping_sent:236cluster_stats_messages_pong_sent:233cluster_stats_messages_sent:469

cluster_stats_messages_ping_received:233cluster_stats_messages_pong_received:232cluster_stats_messages_received:465

127.0.0.1:6379>clusternodesc856c94ba8d2c55a0d176831bc85aa34a96fde88192.168.3.11:6379@16379master-01653752958838

1connected0-54608b01b1bc6202e1dc7ff9f15013d8200b10ecb3f3192.168.3.2:6379@16379myself,master-01653752957000

2connected5461-109222113cf366ad27ebd73585f03d368e77f03b1a2e1192.168.3.4:6379@16379slaveff0d1d636f94d9b092e6012408c1d0918e00e6ed

016537529578044connected2108a90495c147c675328f9b8b4fa49e2b856faf192.168.3.6:6379@16379slave8b01b1bc6202e1dc7ff9f15013d8200b10ecb3f3

016537529570866connectedff0d1d636f94d9b092e6012408c1d0918e00e6ed192.168.3.3:6379@16379master-01653752958000

3connected10923-16383d92ff5984ab29370af0adeaca71e7938c0287ca5192.168.3.5:6379@16379slavec856c94ba8d2c55a0d176831bc85aa34a96fde88

016537529585291connected127.0.0.1:6379>clusterslots1)1)(integer)54612)(integer)109223)1)"192.168.3.2"

2)(integer)63793)"8b01b1bc6202e1dc7ff9f15013d8200b10ecb3f3"4)1)"192.168.3.6"2)(integer)63793)"2108a90495c147c675328f9b8b4fa49e2b856faf"

2)1)(integer)02)(integer)54603)1)"192.168.3.11"---》可以看到集群中该节点的IP已经为修改后的IP2)(integer)63793)"c856c94ba8d2c55a0d176831bc85aa34a96fde88"

4)1)"192.168.3.5"2)(integer)63793)"d92ff5984ab29370af0adeaca71e7938c0287ca5"3)1)(integer)109232)(integer)

163833)1)"192.168.3.3"2)(integer)63793)"ff0d1d636f94d9b092e6012408c1d0918e00e6ed"4)1)"192.168.3.4"2)(integer)

63793)"2113cf366ad27ebd73585f03d368e77f03b1a2e1"另一种情况 当集群报错cluster_state:fail时,也有可能是因为slot未完全分配的问题导致集群不可用。

因为redis为了保证集群完整性, 默认情况下当集群16384个槽任何一个没有指派到节点时,整个redis集群都会不可用这是对集群完整性的一种保护措施, 保证所有的槽都指派给在线的redis节点这种情况时,重新分配这些。

slots即可解决集群不可用问题。如果您喜欢本文,就请动动您的发财手为本文点赞评论转发,让我们一起学习更多运维相关知识,最后请记得关注我。

免责声明:本站所有信息均搜集自互联网,并不代表本站观点,本站不对其真实合法性负责。如有信息侵犯了您的权益,请告知,本站将立刻处理。联系QQ:1640731186