-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathrebuild_ceph_mon_storedb.rhcs5.sh
executable file
·244 lines (212 loc) · 9.24 KB
/
rebuild_ceph_mon_storedb.rhcs5.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
#!/usr/bin/bash
# Please edit this script and set this to a space delimitated list of your osd hosts
osd_hosts=""
# RHCS 5.2 container - may need to edit this for future releases
container="registry.redhat.io/rhceph/rhceph-5-rhel8@sha256:3075e8708792ebd527ca14849b6af4a11256a3f881ab09b837d7af0f8b2102ea"
# ----------- Do not edit below this line -----------
display_usage() {
echo "$0 usage : "
echo "first parameter of -x will toggle on debugging"
echo "Pass osd hosts as additional paramters"
echo "You can also edit this script and set the osd_hosts variable"
echo "Expects that the admin node you're running this script from has root level ssh key access to all osd nodes"
exit
}
log() {
echo $(date +%F\ %T) $(hostname -s) "$1"
}
checkReturn() {
if [ $1 -ne 0 ]; then
log "ERROR: ${2} failed: returned ${1}"
if [ ! -z "$3" ]; then
exit $1
fi
fi
}
if [ $# -eq 0 ] && [ "$osd_hosts" == "" ]; then
display_usage
exit;
fi
# if first option is -x, we turn on set -x
if [ "$1" == "-x" ]; then
set -x
shift
fi
# or you can also pass a list of the osd nodes as parameters
if [ $# -gt 0 ] && [ "$osd_hosts" == "" ]; then
for node in "$@"; do
if [ "$osd_hosts" == "" ]; then
osd_hosts="$node"
else
osd_hosts+=" $node"
fi
done
else
if [ "$osd_hosts" == "" ]; then
log "Error: Please edit this script and configure the osd_hosts option with the list of all OSD hosts in your cluster."
exit;
fi
fi
log "INFO: Gathering fsid"
fsid=$(awk '/fsid *= */ {print $NF}' /etc/ceph/ceph.conf)
checkReturn $? "Gather FSID" 1
# determine the container engine in-use. use rpm as there is a podman-docker container that supplies a podman 'docker' symlink
# defaults to checking for podman
container_engine=$(which podman)
if [ "$?" == "1" ]; then
container_engine=$(which docker)
if [ "$?" == "1" ]; then
container_engine=$(which crio)
checkReturn $? "Checking for container enginer" 1
fi
fi
log "INFO: Using $container_engine for containers"
log "INFO: Using $container for osd recovery container"
log "INFO: Gathering OSD list"
# Space separated CSV: host,osdid[,osdid]... host2,osdid[,osdid]...
# Example:
# osd_list="host1,0,1,2 host2,3,4,5"
#
# osd_list=$(ceph orch ps | awk '/^osd\.[0-9][0-9]* / { gsub(/[^0-9]/,"",$1); osdlist[$2]=osdlist[$2]","$1 } END { hcount=asorti(osdlist,sorted); for(i=1;i<=hcount;i++) { print sorted[i] osdlist[sorted[i]]; }}')
# if all mons are down, ceph orch is likely inaccessible
# so we likely need to build this differently
osd_list=""
for h in $osd_hosts; do
remote_output=$(ssh -T $h lvs --noheadings -a -o lv_tags)
host_string="${h},"
for lvs in $remote_output; do
host_string+=$(echo $lvs|sed 's/.*,ceph\.osd_id=\([[:digit:]]*\),.*/\1/'|tr "\n" ",")
done
osd_list+=" $(echo ${host_string}|sed 's/,$//')"
done
checkReturn $? "OSD list" 1
log "INFO: Constructed the OSD list : $osd_list"
log "CONFIRM: Please confirm this is formatted correctly to continue (hostname1,0,1,2 hostname2,3,4,5 ...). Press any key to continue, CTRL-C to quit"
read ans
dirbase=/tmp/monrecovery.$(date +%F_%H-%M-%S)
log "INFO: Setting up directory structure in ${dirbase}"
for mydir in ms db db_slow logs; do
mkdir -p "${dirbase}/${mydir}" &> /dev/null
done
log "INFO: Creating container entrypoint script osd_mon-store.db_rebuild.sh"
# osd_mon-store.db_rebuild.sh runs within the podman recovery container. log all of its output to a logfile
cat <<EOF > ${dirbase}/osd_mon-store.db_rebuild.sh
#!/bin/bash
recopath=/var/log/ceph/monrecovery
logfile="\${recopath}/logs/\$(ls /var/lib/ceph/osd)_recover.log"
echo > \$logfile
# log all output from this to
log() {
echo \$(date +%F\ %T) \${HOSTNAME} "\$1" >> \$logfile
}
checkReturn() {
if [ \$1 -ne 0 ]; then
log "ERROR: \${2} failed: returned \${1}"
fi
}
log "INFO: Sleep 5 seconds for filesystem stabilization"
sleep 5
log "INFO: Moving db and db_slow to ~/"
mv \${recopath}/{db,db_slow} ~/
for datadir in /var/lib/ceph/osd/ceph-*; do
log "INFO: Checking for locks on \${datadir} before trying ceph-objectstore-tool"
lslocks |grep \${datadir} >> \${logfile}
count=0
while [ "\$?" == "0" ]; do
log "DEBUG: still locked, sleeping for 5 seconds"
sleep 5
((count++))
if [ "\${count}" == "10" ]; then
log "ERROR: osd is still locked after 10 attempts, failing"
exit 1
fi
lslocks |grep \${datadir} >> \${logfile}
done
log "INFO: Running update-mon-db on \${datadir}"
CEPH_ARGS="--no_mon_config" ceph-objectstore-tool --debug --data-path \${datadir} --type bluestore --op update-mon-db --mon-store-path \${recopath}/ms &> \${recopath}/logs/osd.\$(basename \$datadir)_cot.log
checkReturn \$? "COT update-mon-db"
if [ -e \${datadir}/keyring ]; then
cat \${datadir}/keyring >> \${recopath}/ms/keyring
echo ' caps mgr = "allow profile osd"' >> \${recopath}/ms/keyring
echo ' caps mon = "allow profile osd"' >> \${recopath}/ms/keyring
echo ' caps osd = "allow *"' >> \${recopath}/ms/keyring
echo >> \${recopath}/ms/keyring
else
log "WARNING: \${datadir} does not have a local keyring."
fi
done
log "INFO: Moving db and db_slow from ~/"
mv ~/{db,db_slow} /var/log/ceph/monrecovery/
log "INFO: monrecovery directory listing \n \$(ls -laR /var/log/ceph/monrecovery/)"
EOF
chmod 755 ${dirbase}/osd_mon-store.db_rebuild.sh
pullData() {
log "INFO: Pulling ${1}:/var/log/ceph/${fsid}/monrecovery/"
rsync -aqz --delete --remove-source-files ${1}:/var/log/ceph/${fsid}/monrecovery/ ${dirbase}/
checkReturn $? "Pulling ${1}:/var/lib/ceph/${fsid}/monrecovery" 1
}
pushData() {
log "INFO: Pushing ${1}:/var/log/ceph/${fsid}/monrecovery/"
rsync -aqz --delete --remove-source-files ${dirbase}/ ${1}:/var/log/ceph/${fsid}/monrecovery/
checkReturn $? "Pushing ${1}:/var/lib/ceph/${fsid}/monrecovery" 1
}
for hostosd in $osd_list; do
osdhost=$(echo $hostosd | sed -e 's/,.*$//')
osdids=$(echo $hostosd | sed -e 's/^[^,]*,//' -e 's/,/ /g')
# skipping maintenance mode. Do we want to set noout or any flags while we bounce the osds?
pushData $osdhost
log "INFO: Starting osd_mon-store.db_rebuild.sh loop on ${osdhost} for OSDs $osdids."
# make a script locally, scp it and run it remotely. unescaped variables will expand, escaped will be variables on the remote host
cat <<EOF > ${dirbase}/recover_${osdhost}.sh
#!/bin/bash
logfile="/var/log/ceph/${fsid}/monrecovery/logs/${osdhost}.log"
log() {
echo \$(date +%F\ %T) \${HOSTNAME} "\$1" >> \${logfile}
}
checkReturn() {
if [ \$1 -ne 0 ]; then
log "ERROR: \${2} failed: returned \${1}"
fi
}
for osdid in ${osdids}; do
log "INFO: ready to recover \${osdid}"
# barebones container with the recovery script as the entry point pointing to all of the specifics for the osd
shell_cmd="${container_engine} run -i --rm --ipc=host --stop-signal=SIGTERM --authfile=/etc/ceph/podman-auth.json --net=host --entrypoint /var/log/ceph/monrecovery/osd_mon-store.db_rebuild.sh --privileged -v /var/run/ceph/${fsid}:/var/run/ceph:z -v /var/log/ceph/${fsid}:/var/log/ceph:z -v /var/lib/ceph/${fsid}/osd.\${osdid}:/var/lib/ceph/osd/ceph-\${osdid}:z -v /var/lib/ceph/${fsid}/osd.\${osdid}/config:/etc/ceph/ceph.conf:z -v /dev:/dev -v /run/udev:/run/udev -v /sys:/sys -v /run/lvm:/run/lvm -v /run/lock/lvm:/run/lock/lvm -v /var/lib/ceph/${fsid}/selinux:/sys/fs/selinux:ro -v /:/rootfs ${container}"
systemctl stop ceph-${fsid}@osd.\${osdid}.service >> \$logfile
checkReturn $? "Stopping osd \${osdid}" 1
# after stopping this osd, we loop up to 10 times waiting for the lock on the osd fsid to disappear
# otherwise the entrypoint script will fail because the osd still has a lock on the device
sleep 10
count=0
lslocks |grep /var/lib/ceph/osd/ceph-\${osdid}/fsid > /dev/null
while [ "\$?" == "0" ]; do
sleep 10
((count++))
if [ \$count -gt 10 ]; then
log "ERROR: We've looped 10 times waiting for \${osdid} to stop."
exit
fi
lslocks |grep /var/lib/ceph/osd/ceph-\${osdid}/fsid > /dev/null
done
# run the container with the osd_mon-store.db_rebuild.sh entry point
log "INFO: Starting container for osd recovery for \${osdid}"
eval \$shell_cmd
log "INFO: container finished with osd recovery for \${osdid}"
# No longer restarting the osd - documented procedure has "stop all osds".
sleep 10
done
EOF
chmod +x ${dirbase}/recover_${osdhost}.sh
scp -q ${dirbase}/recover_${osdhost}.sh $osdhost:/tmp/
ssh -T ${osdhost} /tmp/recover_${osdhost}.sh
sleep 10
ssh -T ${osdhost} rm -rf /tmp/recover_${osdhost}.sh
pullData ${osdhost}
done
log "INFO: Done. ... document further steps. https://docs.ceph.com/en/quincy/rados/troubleshooting/troubleshooting-mon/#mon-store-recovery-using-osds"
log "INFO: ceph-monstore-tool ${dirbase} rebuild -- --keyring /path/to/admin.keyring --mon-ids alpha beta gamma"
log "INFO: Need to specify mon-ids in numerical IP address order"
log "INFO: Final Results : $(ls -laR $dirbase)"
if [ ! -e $dirbase/ms/store.db ]; then
log "ERROR: Something did not go as expected. No store.db directory generated."
fi