#!/bin/bash

declare -a SLOTS
declare -a OSDS_BY_NVME
declare -a IDS
declare -a LVS
declare -a LUKS
declare -A OSDS_UUID

usage() {
    MSG="""
    Usage: $basename $0 [-h] [LIST_NVME]

    Argument:
        LIST_NVME   Reseted NVME list to repair (/dev/nvmeXn2) separate by space
                    All Reset NVME (nvmeXn2) if empty
 """
    printf "$MSG" 
}

if [ "$1" == "-h" ]; then
    usage
    ct_on_exit 0
fi

check_attempts() {
	DISK=$1
	# Get the current timestamp
	current_timestamp=$(date +"%s")

	# 3 * 24 hours in seconds
	hours=$((3 * 24 * 60 * 60 ))

	# Calculate the timestamp for 24 hours ago
	hours_ago=$((current_timestamp - hours))

	count=$(awk -v start_time="$hours_ago" -v disk="$DISK" '
            {
                log_time=$1
                if (log_time >= start_time && $2 == disk) {
                    print $0
                }
            }
        ' /var/log/ct-repair-nvme-osd.log | wc -l)

        echo "$count"
}

echo Listing disks to repair.
DEVS=$(lsblk ${@}| awk '{print $1}' | grep n2)

[ -z $DEVS ] && echo "No disk to repair" && exit 0

# Retrieve OSDs For this Host
for DEV in $DEVS
do
    log_count=$(check_attempts $DEV)
    echo "Number of auto repairs for $DEV: $log_count"
    # Check if count exceeds 3 and exit the script
    if [ $log_count -ge 7 ]; then
        echo "Already ran ct-repair-nvme-osd $log_count times for $DEV. Not retrying."
        continue
    fi

    echo $(date +"%s") $DEV >> /var/log/ct-repair-nvme-osd.log
    IDX=$(echo ${DEV} | grep -o '[[:digit:]]*' |head -1)
    SEARCH=" nvme${IDX}n1 "
    OSDS=$(ceph device ls-by-host $(hostname) | grep "${SEARCH}" | awk -F ${SEARCH} '{print $2}'| awk -F' osd.' '{for(i=2;i<=NF;i++) print $i};'|xargs)
    OSDS_BY_NVME[$IDX]=$OSDS
    for OSD in $OSDS; do
        ID=$(cat /var/lib/ceph/osd/ceph-${OSD}/lockbox.keyring | grep client | awk -F'.' '{print $3}'| tr -d "]")
        OSDS_UUID[$ID]=${OSD}
    done

  IDX=$(echo ${DEV} | grep -o '[[:digit:]]*' |head -1)
  # Physical nvme slot
  SLOTS[$IDX]=$(cat /sys/block/${DEV}/device/address | xargs -i{} lspci -v -s {} | grep "Physical Slot"| awk -F':' '{print $2}'|xargs)
  # VG
  VG=$(pvs --noheadings -o vg_name /dev/${DEV} 2>/dev/null|xargs)
  # LV path and LUKS crypt UUID
  OLDIFS=$IFS
  IFS=$'\n'
  for line in $(lvs --noheadings -o lv_path,lv_uuid,lv_name ${VG}); do
    NAME=$(echo $line| awk '{print $3}'|awk -F 'block-' '{print $2}')
    OSD=${OSDS_UUID[$NAME]}
    IDS[$OSD]=$NAME
    LUKS[$OSD]=$(echo $line| awk '{print $2}')
    LVS[$OSD]=$(echo $line| awk '{print $1}')
  done
  IFS=$OLDIFS
done

echo
echo Reseting NVME :
for nvme in ${!SLOTS[@]}; do
    echo Nvme${nvme}:
    echo -e "\tSLOT: ${SLOTS[$nvme]}"
    echo -e "\tOSDS:${OSDS_BY_NVME[$nvme]}"
    for OSD in ${OSDS_BY_NVME[$nvme]}; do
        echo osd.${OSD}
        echo -e "\tIDS: ${IDS[$OSD]}"
        echo -e "\tLUKS: ${LUKS[$OSD]}"
        echo -e "\tLVS: ${LVS[$OSD]}"
    done
done

echo
echo Repairing...
# Iterating on each disk
for nvme in ${!SLOTS[@]}; do
    # Powering off the disk
    echo Powering off nvme${nvme}
    (echo 0 > /sys/bus/pci/slots/${SLOTS[$nvme]}/power)
    sleep 5
    # Cleaning device mapper table
    for OSD in ${OSDS_BY_NVME[$nvme]}; do
        echo Cleaning ${LUKS[$OSD]}
        dmsetup remove ${LUKS[$OSD]}
        echo Cleaning ${LVS[$OSD]}
        dmsetup remove ${LVS[$OSD]}
    done
    sleep 1

    # Powering on teh disk
    echo Powering on nvme${nvme}
    (echo 1 > /sys/bus/pci/slots/${SLOTS[$nvme]}/power)
    sleep 5

    for OSD in ${OSDS_BY_NVME[$nvme]}; do
        echo Opening LUKS ${LUKS[$OSD]}
        (ceph config-key get dm-crypt/osd/${IDS[$OSD]}/luks | /usr/sbin/cryptsetup --key-file - --allow-discards luksOpen ${LVS[$OSD]} ${LUKS[$OSD]})

        echo Enabling osd.${OSD}
        chown -R ceph:ceph /var/lib/ceph/osd/ceph-${OSD}
        ceph-bluestore-tool --cluster=ceph prime-osd-dir --dev /dev/mapper/${LUKS[$OSD]} --path /var/lib/ceph/osd/ceph-${OSD} --no-mon-config
        rm -f /var/lib/ceph/osd/ceph-${OSD}/block
        ln -snf /dev/mapper/${LUKS[$OSD]} /var/lib/ceph/osd/ceph-${OSD}/block
        DM=$(ls -l /dev/mapper/${LUKS[$OSD]} | awk -F'->' '{print $2}'| awk -F'/' '{print $2}')
        chown -R ceph:ceph /dev/${DM}
        chown -R ceph:ceph /var/lib/ceph/osd/ceph-${OSD}
        systemctl enable ceph-volume@lvm-${OSD}-${IDS[$OSD]}
        systemctl enable --runtime ceph-osd@${OSD}

        echo Restarting OSD service : ${OSD}
        systemctl start ceph-osd@${OSD}


    done
    if (lsblk /dev/nvme${nvme}n1 &>/dev/null| grep crypt); then
        OUTPUT="${OUTPUT}\nOSD Repair Failed for NVME ${nvme}"
        OUTPUT="${OUTPUT}\nYou can retry this script !"
     else
        OUTPUT="${OUTPUT}\nOSD Repair for NVME ${nvme} OK :"
        OUTPUT="${OUTPUT}\n $(lsblk /dev/nvme${nvme}n1 --noheading)'"
    fi
done

printf "$OUTPUT\n"
