GFS升级失败,gfs磁盘发生变化,恢复脚本;
#!/bin/bash
# 缺设备参数格式校验,设备是否存在,节点格式是否正确
curDir=$(dirname $(readlink -f "$0"))
Dir=$(dirname "$curDir")
NODE_NAMES=`kubectl get node --selector="master" |grep -v "NAME" |grep master |awk '{print $1}' |tr "\n" " "`
ARRAY_NAMES=($NODE_NAMES)
NAMESPACES=glusterfs-example
#检测当前环境中role为Master的节点数量
TOKEN=`kubectl get configmap matrix -n matrix -o jsOnpath={.data.MATRIX_INTERNAL_TOKEN}`
IN_VIP=`kubectl get configmap matrix -n matrix -o jsOnpath={.data.MATRIX_INTERNAL_VIP}`
MATRIX_SECURE_PORT=`kubectl get cm matrix -n matrix -o jsOnpath={.data.MATRIX_SECURE_PORT}`
MATRIX_SYSTEMCOnFIG=$(curl -s -k -H "X-Auth-Token:${TOKEN}" https://localhost:8443/matrix/rsapi/v1.0/cluster/systemconfig)
VIP_MODE=`echo ${MATRIX_SYSTEMCONFIG} | jq -r ".vipMode"`
if [[ ${VIP_MODE} == "EXTERNAL" ]]; then
IN_VIP=`kubectl get svc matrix-leader-service -n matrix -o jsOnpath={.spec.clusterIP}`
fi
if [[ $IN_VIP =~ ":" ]]; then
IN_VIP="\[$IN_VIP\]"
fi
NODES_INFO=`curl -k -H "X-Auth-Token:${TOKEN}" https://${IN_VIP}:${MATRIX_SECURE_PORT}/matrix/rsapi/v1.0/cluster/nodes`
NODES_NUM=`echo ${NODES_INFO}|jq length`
MASTER_NODE_NUMBER=0
for ((i=0;i<${NODES_NUM};i++)); do
role=`echo ${NODES_INFO} | jq -r .[$i].nodeBaseInfo.role`
echo "the node role is $role"
if [ $role == "Master" ]; then
((MASTER_NODE_NUMBER+=1))
fi
done
echo "the number of master is $MASTER_NODE_NUMBER"
#获取matrix节点信息
matrix_token=$(kubectl get configmap -n matrix matrix -o jsOnpath='{.data.MATRIX_INTERNAL_TOKEN}')
matrix_port=$(kubectl get configmap -n matrix matrix -o jsOnpath='{.data.MATRIX_SECURE_PORT}')
ssh_port_result=`curl -s -g --header "Content-Type:application/json" -k --header "X-Auth-Token:${matrix_token}" --request GET --url https://localhost:${matrix_port}/matrix/rsapi/v1.0/cluster/ssh_port --retry 3 --retry-delay 10 --max-time 5 --connect-timeout 3`
ssh_port=`echo $ssh_port_result | jq ."sshPort"`
if [[ $ssh_port'X' == 'X' ]]; then
echo "get ssh port error."
ssh_port=22
fi
#获取master节点ip
master_node_iparr=($(kubectl get nodes -owide --selector=node-role.kubernetes.io/master --no-headers=true |awk '{print $6}'))
#获取节点权限
# shellcheck disable=SC2006
nodes_account_info=`curl -s --retry 3 --retry-delay 10 --max-time 5 --connect-timeout 3 --header "Content-Type:application/json" -k --header "X-Auth-Token:${matrix_token}" \
--request GET --url https://localhost:${matrix_port}/matrix/rsapi/v1.0/cluster/nodes_account_info`
node_account_num=`echo $nodes_account_info | jq '. | length'`
localhost_iparr=($(hostname -I))
check_scripts_path=/opt/matrix/app/install/metadata/gluster/gluster/heketi/scripts
install_pack_path=/opt/matrix/app/install/packages
function checkParam() {
#获取已有glusterfs安装信息
secret=`kubectl get secret -n ${NAMESPACES} heketi-config-secret -o json|jq .data |grep topology.json |sed "s#\"topology.json\": ##g" |sed 's#\"##g' |tr -s [:space:] |sed 's/^[ \t]*//g'`
gfs_json=`echo $secret |base64 -d`
gfs_pack_name=$1
#遍历gfs_json
gfs_json_len=`echo $gfs_json |jq .clusters[0].nodes |jq length`
for ((i=0;i<${gfs_json_len};i++)); do
gfs_node_info=$(echo $gfs_json |jq .clusters[0].nodes[$i])
node_name=$(echo $gfs_node_info | jq -r .node.hostnames.manage[0])
#
devs_len=$(echo $gfs_node_info |jq .devices |jq length)
for ((j=0;j<${devs_len};j++)); do
dev_name=$(echo $gfs_node_info |jq -r .devices[$j])
isExistDevice $node_name $dev_name
if [[ $? -ne 0 ]];then
#盘符发生变化,注释脚本
echo "gfs 发生盘符变化"
echo "Taking evasive measures ..."
#备份安装包
if [[ -d ${install_pack_path}/gfs_bak ]];then
echo "备份文件已存在"
else
mkdir -p ${install_pack_path}/gfs_bak
\cp ${install_pack_path}/${gfs_pack_name} ${install_pack_path}/gfs_bak/
fi
#解压安装包
unzip -q ${install_pack_path}/${gfs_pack_name} -d ${install_pack_path}/
arch_type=$(echo ${gfs_pack_name} |grep x86 |wc -l)
if [[ ${arch_type} -ne 0 ]];then
dir_name=${gfs_pack_name%_x86*.*}
else
dir_name=${gfs_pack_name%_arm*.*}
fi
#替换文件内容
sed -i '/isExistDevice /s/^/#/' ${install_pack_path}/${dir_name}/metadata/gluster/heketi/scripts/check.sh
sed -i '/checkDiskUsed /s/^/#/' ${install_pack_path}/${dir_name}/metadata/gluster/heketi/scripts/check.sh
#
sed -i '/#.*isExistDevice/a\ sleep 2' ${install_pack_path}/${dir_name}/metadata/gluster/heketi/scripts/check.sh
#重新压缩安装包
rm -rf ${install_pack_path}/${gfs_pack_name}
cd ${install_pack_path}/
zip -qr ${install_pack_path}/${gfs_pack_name} ${dir_name}
#删除解压文件
rm -rf ${install_pack_path}/${dir_name}
#拷贝到其他节点
for i in $(seq 0 $((node_account_num - 1))); do
node_ip=$(echo $nodes_account_info | jq -r ".[$i].ipList[0]")
if [[ ${localhost_iparr[@]} =~ ${node_ip} ]];then
continue
fi
node_user=$(echo $nodes_account_info | jq -r ".[$i].username")
node_passwd=$(echo $nodes_account_info | jq -r ".[$i].password")
is_already_bask=$(sshpass -p ''${node_passwd}'' ssh -q -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p ${ssh_port} ${node_user}@${node_ip} "sudo bash -c '[ -d ${install_pack_path}/gfs_bak ] && echo 1 || echo 0 '")
if [[ ${is_already_bask} -ne 0 ]];then
sshpass -p ''${node_passwd}'' ssh -q -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p ${ssh_port} ${node_user}@${node_ip} \
"sudo bash -c 'mkdir -p ${install_pack_path}/gfs_bak; \cp ${install_pack_path}/${gfs_pack_name} ${install_pack_path}/gfs_bak/'"
fi
sshpass -p ''${node_passwd}'' scp -r -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -P ${ssh_port} ${install_pack_path}/${gfs_pack_name} ${node_user}@${node_ip}:${install_pack_path}/
done
return 0
fi
done
done
echo "gfs盘符未发生变化!无需操作"
}
function isExistDevice() {
num=`echo ${NODES_INFO}|jq length`
dev=`echo $2|sed 's#\/dev\/##g'`
for ((k=0;k<${num};k++)); do
hostName=`echo $NODES_INFO| jq .[$k].nodeBaseInfo.nodeName |sed 's/\"//g'`
if [ "$hostName" == "$1" ]; then
NODEID=`echo ${NODES_INFO} | jq -r .[$k].nodeId`
exitCode=`curl -X POST -k -H "X-Auth-Token:${TOKEN}" -H "Content-Type:application/json" -d "{\"nodeId\":\"${NODEID}\",\"command\":\"ls /dev/$dev \"}" https://${IN_VIP}:${MATRIX_SECURE_PORT}/matrix/rsapi/v1.0/exec_cmd |jq .exitCode`
if [[ $exitCode -ne 0 ]]; then
echo "device is not exit, nodename:$1, device:$2"
return 1
fi
exitCode=`curl -X POST -k -H "X-Auth-Token:${TOKEN}" -H "Content-Type:application/json" -d "{\"nodeId\":\"${NODEID}\",\"command\":\"df -h |grep /dev/$dev \"}" https://${IN_VIP}:${MATRIX_SECURE_PORT}/matrix/rsapi/v1.0/exec_cmd |jq .exitCode`
if [[ $exitCode -eq 0 ]]; then
echo "device is mounted, nodename:$1, device:$2"
return 1
fi
exitCode=`curl -X POST -k -H "X-Auth-Token:${TOKEN}" -H "Content-Type:application/json" -d "{\"nodeId\":\"${NODEID}\",\"command\":\"pvdisplay |grep /dev/$dev \"}" https://${IN_VIP}:${MATRIX_SECURE_PORT}/matrix/rsapi/v1.0/exec_cmd |jq .exitCode`
if [[ $exitCode -ne 0 ]]; then
echo "device type is not lvm, nodename:$1, device:$2"
return 1
fi
break
fi
done
}
function check_rcovery() {
gfs_pack_name=$1
#sed -i '/^# *isExistDevice /s/^# //' /opt/matrix/app/install/metadata/gluster/gluster/heketi/scripts/check_bak.sh
#sed -i '/^# *checkDiskUsed /s/^# //' /opt/matrix/app/install/metadata/gluster/gluster/heketi/scripts/check_bak.sh
echo "Recover environment ..."
for i in $(seq 0 $((node_account_num - 1))); do
node_ip=$(echo $nodes_account_info | jq -r ".[$i].ipList[0]")
node_user=$(echo $nodes_account_info | jq -r ".[$i].username")
node_passwd=$(echo $nodes_account_info | jq -r ".[$i].password")
#判断是否发生了盘符变化
is_change=$(sshpass -p ''${node_passwd}'' ssh -q -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p ${ssh_port} ${node_user}@${node_ip} \
"sudo bash -c ' ls -l ${install_pack_path} |grep gfs_bak |wc -l '")
if [[ ${is_change} -ne 0 ]];then
#修改各节点安装包
sshpass -p ''${node_passwd}'' ssh -q -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p ${ssh_port} ${node_user}@${node_ip} \
"sudo bash -c '\cp ${install_pack_path}/gfs_bak/${gfs_pack_name} ${install_pack_path}/'"
#修改metadata
sshpass -p ''${node_passwd}'' ssh -q -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p ${ssh_port} ${node_user}@${node_ip} \
"sudo bash -c \"sed -i '/^#\+ *isExistDevice /s/^#\+//' ${check_scripts_path}/check.sh\""
sshpass -p ''${node_passwd}'' ssh -q -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p ${ssh_port} ${node_user}@${node_ip} \
"sudo bash -c \"sed -i '/^#\+ *checkDiskUsed /s/^#\+//' ${check_scripts_path}/check.sh\""
sshpass -p ''${node_passwd}'' ssh -q -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p ${ssh_port} ${node_user}@${node_ip} \
"sudo bash -c \"sed -i '/sleep 2/d' ${check_scripts_path}/check.sh\""
#删除备份文件
sshpass -p ''${node_passwd}'' ssh -q -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p ${ssh_port} ${node_user}@${node_ip} \
"sudo bash -c 'rm -rf ${install_pack_path}/gfs_bak '"
fi
done
echo "脚本执行成功"
}
main() {
specPath=$3
if [[ -n ${specPath} ]];then
check_scripts_path=${specPath}
fi
gfs_pack=$2
if [[ ${gfs_pack}"X" == "X" ]];then
echo "传参错误,请传入脚本运行参数: \$1 start 或 end, \$2 升级安装包名"
exit 1
fi
if [[ $1 == "start" ]];then
checkParam $gfs_pack
if [[ $? -eq 0 ]];then
echo "脚本执行成功!"
fi
elif [[ $1 == "end" ]];then
check_rcovery $gfs_pack
else
echo "传参错误,请传入脚本运行参数: \$1 start 或 end, \$2 升级安装包名"
exit 1
fi
}
main $@
可直接连接技术人员直接获取;
该案例暂时没有网友评论
✖
案例意见反馈
亲~登录后才可以操作哦!
确定你的邮箱还未认证,请认证邮箱或绑定手机后进行当前操作