Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rebuild mon store.db #2

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
106 changes: 0 additions & 106 deletions rebuild_ceph_mon_storedb.cephadm.sh

This file was deleted.

20 changes: 20 additions & 0 deletions rebuild_ceph_mon_storedb.rhcs5.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Notes/Ideas

## check status of the osds before we do anything .....

Check the status of each osd before hand
$ osd6_status=$(ssh rhel86-rhcs52-admin "systemctl status [email protected]")

$? represents the return of the remote ssh command
echo $?
0

and $osd6_status represents the output of the command

$ echo $osd6_status
[email protected] - Ceph osd.6 for 6660cb98-5153-11ed-a9da-525400adb33b Loaded: loaded (/etc/systemd/system/[email protected]; enabled; vendor preset: disabled) Active: active (running) since Wed 2023-02-08 16:50:22 EST; 5min ago Process: 6016 ExecStopPost=/bin/rm -f /run/[email protected] /run/[email protected] (code=exited, status=0/SUCCESS) Process: 5941 ExecStopPost=/bin/bash /var/lib/ceph/6660cb98-5153-11ed-a9da-525400adb33b/osd.6/unit.poststop (code=exited, status=0/SUCCESS) Process: 5900 ExecStop=/bin/bash -c /bin/podman stop ceph-6660cb98-5153-11ed-a9da-525400adb33b-osd.6 ; bash /var/lib/ceph/6660cb98-5153-11ed-a9da-525400adb33b/osd.6/unit.stop (code=exited, status=0/SUCCESS) Process: 6020 ExecStart=/bin/bash /var/lib/ceph/6660cb98-5153-11ed-a9da-525400adb33b/osd.6/unit.run (code=exited, status=0/SUCCESS) Process: 6019 ExecStartPre=/bin/rm -f /run/[email protected] /run/[email protected] (code=exited, status=0/SUCCESS) Main PID: 6180 (conmon) Tasks: 62 (limit: 17391) Memory: 104.8M CGroup:


## Additional troubleshooting ...
for some reason, randomly in my lab after several tests, started to end up
with an empty store.db directory ... also happened in Steve's QL cluster. Why??
242 changes: 242 additions & 0 deletions rebuild_ceph_mon_storedb.rhcs5.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
#!/usr/bin/bash

# Please edit this script and set this to a space delimitated list of your osd hosts
osd_hosts=""

# RHCS 5.2 container - may need to edit this for future releases
container="registry.redhat.io/rhceph/rhceph-5-rhel8@sha256:3075e8708792ebd527ca14849b6af4a11256a3f881ab09b837d7af0f8b2102ea"

# ----------- Do not edit below this line -----------
display_usage() {

echo "$0 usage : "
echo "first parameter of -x will toggle on debugging"
echo "Pass osd hosts as additional paramters"
echo "You can also edit this script and set the osd_hosts variable"
echo "Expects that the admin node you're running this script from has root level ssh key access to all osd nodes"
exit

}

log() {
echo $(date +%F\ %T) $(hostname -s) "$1"
}

checkReturn() {
if [ $1 -ne 0 ]; then
log "ERROR: ${2} failed: returned ${1}"
if [ ! -z "$3" ]; then
exit $1
fi
fi
}

if [ $# -eq 0 ] && [ "$osd_hosts" == "" ]; then
display_usage
exit;
fi


# if first option is -x, we turn on set -x
if [ "$1" == "-x" ]; then
set -x
shift
fi

# or you can also pass a list of the osd nodes as parameters
if [ $# -gt 0 ] && [ "$osd_hosts" == "" ]; then
for node in "$@"; do
if [ "$osd_hosts" == "" ]; then
osd_hosts="$node"
else
osd_hosts+=" $node"
fi
done
else
if [ "$osd_hosts" == "" ]; then
log "Error: Please edit this script and configure the osd_hosts option with the list of all OSD hosts in your cluster."
exit;
fi
fi
log "INFO: Gathering fsid"
fsid=$(awk '/fsid *= */ {print $NF}' /etc/ceph/ceph.conf)
checkReturn $? "Gather FSID" 1


# determine the container engine in-use. use rpm as there is a podman-docker container that supplies a podman 'docker' symlink
# defaults to checking for podman
container_engine=$(which podman)
if [ "$?" == "1" ]; then
container_engine=$(which docker)
if [ "$?" == "1" ]; then
container_engine=$(which crio)
checkReturn $? "Checking for container enginer" 1
fi
fi
log "INFO: Using $container_engine for containers"
log "INFO: Using $container for osd recovery container"

log "INFO: Gathering OSD list"
# CSV: host,osd[,osd]...
# if all mons are down, ceph orch is likely inaccessible
#osd_list=$(ceph orch ps | awk '/^osd\.[0-9][0-9]* / { gsub(/[^0-9]/,"",$1); osdlist[$2]=osdlist[$2]","$1 } END { hcount=asorti(osdlist,sorted); for(i=1;i<=hcount;i++) { print sorted[i] osdlist[sorted[i]]; }}')
# so sometimes we need to build this differently

osd_list=""
for h in $osd_hosts; do
remote_output=$(ssh -T $h lvs --noheadings -a -o lv_tags)
host_string="${h},"
for lvs in $remote_output; do
host_string+=$(echo $lvs|sed 's/.*,ceph\.osd_id=\([[:digit:]]*\),.*/\1/'|tr "\n" ",")
done
osd_list+=" $(echo ${host_string}|sed 's/,$//')"
done

checkReturn $? "OSD list" 1
log "INFO: Constructed the OSD list : $osd_list"
log "CONFIRM: Please confirm this is formatted correctly to continue (hostname1,1,2,3 hostname2,4,5,6 ...). Press any key to continue, CTRL-C to quit"
read ans


dirbase=/tmp/monrecovery.$(date +%F_%H-%M-%S)
log "INFO: Setting up directory structure in ${dirbase}"
for mydir in ms db db_slow logs; do
mkdir -p "${dirbase}/${mydir}" &> /dev/null
done

log "INFO: Creating container entrypoint script osd_mon-store.db_rebuild.sh"
# osd_mon-store.db_rebuild.sh runs within the podman recovery container. log all of its output to a logfile
cat <<EOF > ${dirbase}/osd_mon-store.db_rebuild.sh
#!/bin/bash
recopath=/var/log/ceph/monrecovery
logfile="\${recopath}/logs/\$(ls /var/lib/ceph/osd)_recover.log"
echo > \$logfile

# log all output from this to
log() {
echo \$(date +%F\ %T) \${HOSTNAME} "\$1" >> \$logfile
}
checkReturn() {
if [ \$1 -ne 0 ]; then
log "ERROR: \${2} failed: returned \${1}"
fi
}
log "INFO: Sleep 5 seconds for filesystem stabilization"
sleep 5
log "INFO: Moving db and db_slow to ~/"
mv \${recopath}/{db,db_slow} ~/
for datadir in /var/lib/ceph/osd/ceph-*; do
log "INFO: Checking for locks on \${datadir} before trying ceph-objectstore-tool"
lslocks |grep \${datadir} >> \${logfile}
count=0
while [ "\$?" == "0" ]; do
log "DEBUG: still locked, sleeping for 5 seconds"
sleep 5
((count++))
if [ "\${count}" == "10" ]; then
log "ERROR: osd is still locked after 10 attempts, failing"
exit 1
fi
lslocks |grep \${datadir} >> \${logfile}
done

log "INFO: Running update-mon-db on \${datadir}"
CEPH_ARGS="--no_mon_config" ceph-objectstore-tool --debug --data-path \${datadir} --type bluestore --op update-mon-db --mon-store-path \${recopath}/ms &> \${recopath}/logs/osd.\$(basename \$datadir)_cot.log
checkReturn \$? "COT update-mon-db"
if [ -e \${datadir}/keyring ]; then
cat \${datadir}/keyring >> \${recopath}/ms/keyring
echo ' caps mgr = "allow profile osd"' >> \${recopath}/ms/keyring
echo ' caps mon = "allow profile osd"' >> \${recopath}/ms/keyring
echo ' caps osd = "allow *"' >> \${recopath}/ms/keyring
echo >> \${recopath}/ms/keyring
else
log "WARNING: \${datadir} does not have a local keyring."
fi
done
log "INFO: Moving db and db_slow from ~/"
mv ~/{db,db_slow} /var/log/ceph/monrecovery/
log "INFO: monrecovery directory listing \n \$(ls -laR /var/log/ceph/monrecovery/)"
EOF
chmod 755 ${dirbase}/osd_mon-store.db_rebuild.sh

pullData() {
log "INFO: Pulling ${1}:/var/log/ceph/${fsid}/monrecovery/"
rsync -aqz --delete --remove-source-files ${1}:/var/log/ceph/${fsid}/monrecovery/ ${dirbase}/
checkReturn $? "Pulling ${1}:/var/lib/ceph/${fsid}/monrecovery" 1
}

pushData() {
log "INFO: Pushing ${1}:/var/log/ceph/${fsid}/monrecovery/"
rsync -aqz --delete --remove-source-files ${dirbase}/ ${1}:/var/log/ceph/${fsid}/monrecovery/
checkReturn $? "Pushing ${1}:/var/lib/ceph/${fsid}/monrecovery" 1
}


for hostosd in $osd_list; do
osdhost=$(echo $hostosd | sed -e 's/,.*$//')
osdids=$(echo $hostosd | sed -e 's/^[^,]*,//' -e 's/,/ /g')

# skipping maintenance mode. Do we want to set noout or any flags while we bounce the osds?
pushData $osdhost

log "INFO: Starting osd_mon-store.db_rebuild.sh loop on ${osdhost} for OSDs $osdids."

# make a script locally, scp it and run it remotely. unescaped variables will expand, escaped will be variables on the remote host
cat <<EOF > ${dirbase}/recover_${osdhost}.sh
#!/bin/bash
logfile="/var/log/ceph/${fsid}/monrecovery/logs/${osdhost}.log"
log() {
echo \$(date +%F\ %T) \${HOSTNAME} "\$1" >> \${logfile}
}
checkReturn() {
if [ \$1 -ne 0 ]; then
log "ERROR: \${2} failed: returned \${1}"
fi
}

for osdid in ${osdids}; do
log "INFO: ready to recover \${osdid}"
# barebones container with the recovery script as the entry point pointing to all of the specifics for the osd
shell_cmd="${container_engine} run -i --rm --ipc=host --stop-signal=SIGTERM --authfile=/etc/ceph/podman-auth.json --net=host --entrypoint /var/log/ceph/monrecovery/osd_mon-store.db_rebuild.sh --privileged -v /var/run/ceph/${fsid}:/var/run/ceph:z -v /var/log/ceph/${fsid}:/var/log/ceph:z -v /var/lib/ceph/${fsid}/osd.\${osdid}:/var/lib/ceph/osd/ceph-\${osdid}:z -v /var/lib/ceph/${fsid}/osd.\${osdid}/config:/etc/ceph/ceph.conf:z -v /dev:/dev -v /run/udev:/run/udev -v /sys:/sys -v /run/lvm:/run/lvm -v /run/lock/lvm:/run/lock/lvm -v /var/lib/ceph/${fsid}/selinux:/sys/fs/selinux:ro -v /:/rootfs ${container}"

systemctl stop ceph-${fsid}@osd.\${osdid}.service >> \$logfile
checkReturn $? "Stopping osd \${osdid}" 1
# after stopping this osd, we loop up to 10 times waiting for the lock on the osd fsid to disappear
# otherwise the entrypoint script will fail because the osd still has a lock on the device
sleep 10
count=0
lslocks |grep /var/lib/ceph/osd/ceph-\${osdid}/fsid > /dev/null
while [ "\$?" == "0" ]; do
sleep 10
((count++))
if [ \$count -gt 10 ]; then
log "ERROR: We've looped 10 times waiting for \${osdid} to stop."
exit
fi
lslocks |grep /var/lib/ceph/osd/ceph-\${osdid}/fsid > /dev/null
done
# run the container with the osd_mon-store.db_rebuild.sh entry point
log "INFO: Starting container for osd recovery for \${osdid}"
eval \$shell_cmd

log "INFO: container finished with osd recovery for \${osdid}"
# No longer restarting the osd - documented procedure has "stop all osds".
sleep 10
done
EOF
chmod +x ${dirbase}/recover_${osdhost}.sh

scp -q ${dirbase}/recover_${osdhost}.sh $osdhost:/tmp/
ssh -T ${osdhost} /tmp/recover_${osdhost}.sh
sleep 10
ssh -T ${osdhost} rm -rf /tmp/recover_${osdhost}.sh
pullData ${osdhost}

done
log "INFO: Done. ... document further steps. https://docs.ceph.com/en/quincy/rados/troubleshooting/troubleshooting-mon/#mon-store-recovery-using-osds"
log "INFO: ceph-monstore-tool ${dirbase} rebuild -- --keyring /path/to/admin.keyring --mon-ids alpha beta gamma"
log "INFO: Need to specify mon-ids in numerical IP address order"
log "INFO: Final Results : $(ls -laR $dirbase)"
if [ ! -e $dirbase/ms/store.db ]; then
log "ERROR: Something did not go as expected. No store.db directory generated."
fi