#!/bin/sh

## -------------------------------------------------------------------
##
## riak-cs-debug: Gather info from a node for troubleshooting.
##
## Copyright (c) 2014 Basho Technologies, Inc.,
##               2021 TI Tokyo.  All Rights Reserved.
##
## This file is provided to you under the Apache License,
## Version 2.0 (the "License"); you may not use this file
## except in compliance with the License.  You may obtain
## a copy of the License at
##
##   http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing,
## software distributed under the License is distributed on an
## "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
## KIND, either express or implied.  See the License for the
## specific language governing permissions and limitations
## under the License.
##
## -------------------------------------------------------------------

set +e
# this is to revert the effect of `set -e`, set by relx-generated code
# when we are invoked via `riak-cs debug` (otherwise, non-zero exits,
# which occur normally, will silently cause riak-cs-debug to
# terminate)

# /bin/sh on Solaris is not a POSIX compatible shell, but /usr/bin/ksh is.
if [ `uname -s` = 'SunOS' -a "${POSIX_SHELL}" != "true" ]; then
    POSIX_SHELL="true"
    export POSIX_SHELL
    # To support 'whoami' add /usr/ucb to path
    PATH=/usr/ucb:$PATH
    export PATH
    exec /usr/bin/ksh $0 "$@"
fi
unset POSIX_SHELL # clear it so if we invoke other scripts, they run as ksh as well

###
### Set up variables
###

is_relative() {
    if [ "${1#/}" = "$1" ]; then
        return 0
    else
        return 1
    fi
}

PLATFORM_BASE_DIR=/usr/lib64/riak-cs
PLATFORM_BASE_DIR=${PLATFORM_BASE_DIR:-$(cd $(dirname "$0")/.. && pwd -P)}
cs_base_dir=$PLATFORM_BASE_DIR
if is_relative "/usr/lib64/riak-cs/bin"; then
    cs_bin_dir="$PLATFORM_BASE_DIR//usr/lib64/riak-cs/bin"
else
    cs_bin_dir="/usr/lib64/riak-cs/bin"
fi
if is_relative "/etc/riak-cs"; then
    cs_etc_dir="$PLATFORM_BASE_DIR//etc/riak-cs"
else
    cs_etc_dir="/etc/riak-cs"
fi
if is_relative "/var/log/riak-cs"; then
    cs_log_dir="$PLATFORM_BASE_DIR//var/log/riak-cs"
else
    cs_log_dir="/var/log/riak-cs"
fi
if is_relative "/usr/lib64/riak-cs/lib"; then
    cs_lib_dir="$PLATFORM_BASE_DIR//usr/lib64/riak-cs/lib"
else
    cs_lib_dir="/usr/lib64/riak-cs/lib"
fi
if is_relative "/var/lib/riak-cs"; then
    cs_gen_dir="$PLATFORM_BASE_DIR//var/lib/riak-cs"
else
    cs_gen_dir="/var/lib/riak-cs"
fi

cuttlefish_conf_dir=${cs_gen_dir}/generated.conf

###
### Function declarations
###

. $cs_lib_dir/lib.sh

echoerr () { echo "$@" 1>&2; }

mkdir_or_die () {
    # If the dir already exists, just return
    [ -d "$1" ] && return

    mkdir -p "$1"
    if [ 0 -ne $? ]; then
        echoerr "Error creating riak-cs-debug directories. Aborting."
        echoerr "$1"
        exit 1
    fi
}

dump () {
    # first argument is the filename to hold the command output
    out=$1
    shift

    # next argument is the base command to execute. skip dump if not available.
    [ -z "`command -v $1`" ] && return

    # execute the rest of the arguments as the command
    $* >> "$out" 2>&1

    # grab the return value
    retval=$?

    # put the command and the retval in the .info/$out file to aid automation.
    # note: this will miss some escaping for, e.g., find, but it's not critical.
    # get command that was run with `head -1 .info/$out`
    # get return value from command with `tail -1 .info/$out`
    echo "$*" > .info/"$out"
    echo $retval >> .info/"$out"

    if [ 0 -eq $retval ]; then
        printf '.' 1>&2
    else
        if [ $verbose_output -gt 0 ]; then
            echoerr 'Command '\'"$*"\'' failed. Continuing.'
        else
            printf 'E' 1>&2
        fi
    fi

    return $retval
}

usage () {
cat << 'EOF'
Usage: riak-cs-debug [-c] [-l] [-r] [-s] [-e] [-v] [FILENAME | -]

-c, --cfgs           Gather Riak configuration information (includes everything
                     in platform_etc_dir).
                     Please see the Privacy Note below.
    --ssl-certs      Do not skip the capture of *potentially private* SSL
                     certificates.
                     By default files in the platform_etc_dir with the following
                     extensions are not included in the riak-cs-debug archive:
                     .pfx, .p12, .csr, .sst, .sto, .stl, .pem, .key.
                     Please see the Privacy Note below.
-l, --logs           Gather Riak CS logs.
-r, --riakcmds       Gather Riak CS information and command output.
-s, --syscmds        Gather general system commands.
-e, --extracmds      Gather extra command output. These commands are too intense
                     to run on all nodes in a cluster but appropriate for a
                     single node.
-v, --verbose        Print verbose failure messages to stderr
    --log-mtime      Determine last modified time as n*24 hours, which
                     riak-cs-debug gathers newer logs than the time.
                     (default: 14)
    --skip-accesslog Exclude access.log from logs riak-cs-debug gathers.
FILENAME             Output filename for the tar.gz archive. Use - to specify stdout.

Defaults: Get configs, logs, riak-cs commands, and system commands.
          Output in current directory to NODE_NAME-riak-cs-debug.tar.gz or
          HOSTNAME-riak-cs-debug.tar.gz if NODE_NAME cannot be found.

Privacy Note: When the -c flag is set (included in the defaults) every file in
              the specified in Riak's `platform_etc_dir` will be copied into the
              generated debug archive with the exceptions noted above. If there
              are additional files that you wish to keep out of the archive, the
              enviroment variable RIAK_EXCLUDE may be filled with a space
              separated list of file patterns that will be passed into a `find
              -name`. Any matches will be excluded from the generated archive.
              E.g. `RIAK_EXCLUDE="'*.key' mySecretFile.txt" riak-cs-debug`
EOF
exit
}

curdir=`pwd`
cd "$PLATFORM_BASE_DIR"

gen_result=`"/usr/lib64/riak-cs/bin"/riak-cs-chkconfig | grep -v "OK"`
app_config=`echo $gen_result | cut -d' ' -f 2`

# make a flat, searchable version of the app.config
if [ -z "$app_config" ]; then
    echoerr "App configs missing in /usr/lib64/riak-cs/generated.conf. Aborting."
    exit 1
fi
riak_epaths=`make_app_epaths "${app_config}"`

if [ -d "${cuttlefish_conf_dir}" ]; then
    vmargs=`ls -t ${cuttlefish_conf_dir}/vm.*.args 2>/dev/null | head -1`
fi


###
### Set defaults for items to gather in the report
###

get_cfgs=0
get_ssl_certs=0
get_logs=0
get_riakcmds=0
get_syscmds=0
get_extracmds=0
skip_accesslog=0
verbose_output=0

log_mtime=14

###
### Parse options
###

while [ -n "$1" ]; do
    case "$1" in
        -h|--help)
            usage
            ;;
        -c|--cfgs)
            get_cfgs=1
            ;;
           --ssl-certs)
            get_ssl_certs=1
            ;;
        -l|--logs)
            get_logs=1
            ;;
        -r|--riakcmds)
            get_riakcmds=1
            ;;
        -s|--syscmds)
            get_syscmds=1
            ;;
        -e|--extracmds)
            get_extracmds=1
            ;;
           --log-mtime)
            shift
            log_mtime=$1
            ;;
           --skip-accesslog)
            skip_accesslog=1
            ;;
        -v|--verbose)
            verbose_output=`expr 1 + $verbose_output`
            ;;
        -)
            # If truly specifying stdout as the output file, it should be last
            if [ $# -gt 1 ]; then
                echoerr "Trailing options following filename $1. Aborting."
                echoerr "See 'riak-cs-debug -h' and manpage for help."
                exit 1
            fi

            outfile="-"
            ;;
        *)
            # Shouldn't get here until the last option, the output filename.
            if [ '-' = "$outfile" ]; then
                echoerr "Filename $1 given but stdout, -, already specified."
                echoerr "Aborting. See 'riak-cs-debug -h' and manpage for help."
                exit 1
            fi

            # The filename shouldn't start with a '-'. The single character '-'
            # is handled as a special case above.
            if [ '-' =  `echo "$1" | cut -c1` ]; then
                echoerr "Unrecognized option $1. Aborting"
                echoerr "See 'riak-cs-debug -h' and manpage for help."
                exit 1
            fi

            if [ $# -gt 1 ]; then
                echoerr "Trailing options following filename $1. Aborting."
                echoerr "See 'riak-cs-debug -h' and manpage for help."
                exit 1
            fi

            outfile="$1"
            ;;
    esac
    shift
done

###
### Finish setting up variables and overrides
###

if [ 0 -eq $(( $get_cfgs + $get_logs + $get_riakcmds + $get_syscmds + $get_extracmds )) ]; then
    # Nothing specific was requested, so get everything except extracmds
    get_cfgs=1
    get_logs=1
    get_riakcmds=1
    get_syscmds=1
    get_extracmds=0
fi

if [ 0 -eq $get_cfgs  -a  1 -eq $get_ssl_certs ]; then
    echoerr "The --ssl-certs option is meaningless without --cfg. Ignoring."
    get_ssl_certs=0
fi

if [ 0 -ne $(( $get_cfgs + $get_logs + $get_riakcmds + $get_extracmds )) ]; then

    # Allow overriding cs_base_dir and cs_etc_dir from the environment
    if [ -n "$cs_base_dir" ]; then
        cs_base_dir="$cs_base_dir"
        if [ "/" != "`echo "$cs_base_dir" | cut -c1`" ]; then
            echoerr "Riak base directory should be an absolute path."
            echoerr "$cs_base_dir"
            echoerr "See 'riak-cs-debug -h' and manpage for help."
            exit 1
        fi
    fi

    if [ -n "$cs_bin_dir" ]; then
        cs_bin_dir="$cs_bin_dir"
        if [ "/" != "`echo "$cs_bin_dir" | cut -c1`" ]; then
            echoerr "Riak bin directory should be an absolute path."
            echoerr "$cs_bin_dir"
            echoerr "See 'riak-cs-debug -h' and manpage for help."
            exit 1
        fi
    fi

    if [ -n "$cs_etc_dir" ]; then
        cs_etc_dir="$cs_etc_dir"
        if [ "/" != "`echo "$cs_etc_dir" | cut -c1`" ]; then
            echoerr "Riak etc directory should be an absolute path."
            echoerr "$cs_etc_dir"
            echoerr "See 'riak-cs-debug -h' and manpage for help."
            exit 1
        fi
    fi
fi


if [ -f "${vmargs}" ]; then
    node_name="`egrep '^\-s?name' "${vmargs}" 2>/dev/null | cut -d ' ' -f 2`"
fi

if [ -z "$node_name" ]; then
    # Couldn't figure out node name. Fallback to hostname.
    node_name="`hostname`"
fi

start_dir="$TMPDIR"

if [ -z "$start_dir" ]; then
    start_dir=/tmp
fi

# Strip any trailing slash from TMPDIR
start_dir="`echo $start_dir | sed 's#/$##'`"

debug_dir="${node_name}-riak-cs-debug"

if [ -d "${start_dir}"/"${debug_dir}" ]; then
    echoerr "Temporary directory already exists. Aborting."
    echoerr "${start_dir}"/"${debug_dir}"
    exit 1
fi

if [ -z "$outfile" ]; then
    # If output file not specified, output to the default
    outfile="${curdir}"/"${debug_dir}".tar.gz
fi

if [ '-' != "$outfile" ] && [ -f "$outfile" ]; then
    echoerr "Output file already exists. Aborting."
    echoerr "$outfile"
    exit 1
fi

###
### Gather system commands
###

if [ 1 -eq $get_syscmds ]; then
    mkdir_or_die "${start_dir}"/"${debug_dir}"/commands/.info
    cd "${start_dir}"/"${debug_dir}"/commands

    # System info
    dump date date
    dump w w
    dump last last
    dump hostname hostname
    dump uname uname -a
    dump lsb_release lsb_release
    dump ps ps aux
    dump vmstat vmstat 1 5
    dump free free -m
    dump df df
    dump df_i df -i
    dump dmesg dmesg
    dump mount mount
    dump sysctl sysctl -a
    dump rpm rpm -qa
    dump dpkg dpkg -l
    dump pkg_info pkg_info
    dump sestatus sestatus -v
    dump ifconfig ifconfig -a
    dump netstat_i netstat -i
    dump netstat_an netstat -an
    dump netstat_rn netstat -rn
    dump pfctl_rules pfctl -s rules
    dump pfctl_nat pfctl -s nat
    dump zfs_list zfs list
    dump zpool_list zpool list

    # If swapctl exists, prefer it over swapon
    if [ -n "`command -v swapctl`" ]; then
        dump swapctl swapctl -s
    else
        dump swapon swapon -s
    fi

    BLOCKDEV=/sbin/blockdev
    if [ -e $BLOCKDEV ]; then
        for mount_point in `mount | grep -E '^/' | grep -v /dev/shm | grep -v /var/lib/snapd | awk '{print $1}'`; do
            flat_point=`echo $mount_point | sed 's:/:_:g'`
            dump blockdev.$flat_point $BLOCKDEV --getra $mount_point
        done
    else
        dump blockdev._ echo $BLOCKDEV is not available
    fi

    # Running iptables commands if the module is not loaded can automatically
    # load them. This is rarely desired and can even cause connectivity
    # problems if, e.g., nf_conntrack gets autoloaded and autoenabled.
    if [ -n "`command -v lsmod`" ]; then
        if [ -n "`lsmod 2>/dev/null | awk '{print $1}' | grep iptable_filter`" ]; then
            dump iptables_rules iptables -n -L
        else
            dump iptables_rules echo "iptables module not loaded"
        fi

        if [ -n "`lsmod 2>/dev/null | awk '{print $1}' | grep nf_conntrack`" ]; then
            dump iptables_nat iptables -t nat -n -L
        else
            dump iptables_nat echo "nf_conntrack module not loaded"
        fi
    fi

    if [ -f /proc/diskstats ]; then
        # Linux iostat
        dump iostat_linux iostat -mx 1 5
    elif [ -d /proc ]; then
        # No diskstats, but proc, probably Solaris or SmartOS
        dump iostat_smartos iostat -xnz 1 5
    else
        # BSD style iostat
        dump iostat_bsd iostat -dIw 1 -c 5
    fi

    # Dump files
    [ -f /etc/release ] && dump release cat /etc/release
    [ -f /etc/redhat-release ] && dump redhat_release cat /etc/redhat-release
    [ -f /etc/debian_version ] && dump debian_version cat /etc/debian_version
    [ -f /etc/security/limits.conf ] && dump limits.conf cat /etc/security/limits.conf
    [ -f /var/log/messages ] && dump messages cat /var/log/messages
    [ -f /var/log/syslog ] && dump syslog cat /var/log/syslog
    [ -f /var/log/kern.log ] && dump kern.log cat /var/log/kern.log
    [ -f /proc/diskstats ] && dump diskstats cat /proc/diskstats
    [ -f /proc/cpuinfo ] && dump cpuinfo cat /proc/cpuinfo
    [ -f /proc/meminfo ] && dump meminfo cat /proc/meminfo

    # Dump directories and finds
    [ -d /dev/disk/by-id ] && dump disk_by_id ls -l /dev/disk/by-id
    [ -d /sys/block ] && dump schedulers find /sys/block/ -type l -print -exec cat {}/queue/scheduler \;
    [ -d /proc/net/bonding ] && dump bonding find /proc/net/bonding/ -type f -print -exec cat {} \;
    [ -d /sys/class/net ] && dump rx_crc_errors find /sys/class/net/ -type l -print -exec cat {}/statistics/rx_crc_errors \;

    # A bit more complicated, but let's get all of limits.d if it's there
    if [ -d /etc/security/limits.d ]; then
        # check to ensure there is at least something to get
        ls -1 /etc/security/limits.d | grep -q ".conf"
        if [ 0 -eq $? ]; then
            mkdir_or_die "${start_dir}"/"${debug_dir}"/commands/limits.d

            # Mirror the directory, only copying files that match the pattern
            cd /etc/security/limits.d
            find . -type f -name '*.conf' -exec sh -c '
                mkdir -p "$0/${1%/*}";
                cp "$1" "$0/$1"
                ' "${start_dir}"/"${debug_dir}"/commands/limits.d {} \;
        fi
    fi
fi

###
### Gather Riak commands and info
###

if [ 1 -eq $get_riakcmds ]; then
    mkdir_or_die "${start_dir}"/"${debug_dir}"/commands/.info
    cd "${start_dir}"/"${debug_dir}"/commands

    dump riak_cs_ping riak-cs ping
    dump riak_cs_version riak-cs admin version
    dump riak_cs_status riak-cs admin status
    dump riak_cs_gc_status riak-cs admin gc status
    dump riak_cs_storage_status riak-cs admin storage status
    if grep -q supercluster $cs_etc_dir/riak-cs.conf; then
      dump riak_cs_multibag_list_bags riak-cs supercluster list-members
      dump riak_cs_multibag_weight riak-cs supercluster weight
    fi

    CI=`pwd`/cluster-info.html
    touch $CI
    chmod 666 $CI
    dump cluster-info "$cs_bin_dir"/riak-cs-admin cluster-info $CI
    chmod 444 $CI
fi

###
### Gather Riak logs
###

if [ 1 -eq $get_logs ]; then
    mkdir_or_die "${start_dir}"/"${debug_dir}"/logs/.info
    cd "${start_dir}"/"${debug_dir}"/logs

    # grab a listing of the log directory to show if there are crash dumps
    dump ls_log_dir ls -lhR $cs_log_dir

    # Get any logs in the platform_log_dir
    if [ -d "${cs_log_dir}" ]; then
        # check to ensure there is at least something to get
        ls -1 "${cs_log_dir}" 2>/dev/null | grep -q "log"
        if [ 0 -eq $? ]; then
            mkdir_or_die "${start_dir}"/"${debug_dir}"/logs/platform_log_dir

            # Mirror the directory, only copying files that match the pattern
            cd "$cs_log_dir"
            find . -type f -mtime -$log_mtime -exec sh -c '
                mkdir -p "$0/${1%/*}";
                cp -p "$1" "$0/$1"
                ' "${start_dir}"/"${debug_dir}"/logs/platform_log_dir {} \;

            # Remove access logs
            if [ 1 -eq $skip_accesslog ]; then
                find "${start_dir}"/"${debug_dir}"/logs/platform_log_dir -type f -name 'access.log*' -exec rm {} \;
            fi
        fi
    fi
fi

###
### Gather Riak CS configuration
###

if [ 1 -eq $get_cfgs ]; then
    mkdir_or_die "${start_dir}"/"${debug_dir}"/config/.info

    # Capture the needed files from the `cs_etc_dir` directory.
    cd $cs_etc_dir

    # Convert the list of file blobs to a list of `"! -name <blob> `.
    # For example if `RIAK_EXCLUDE="*.key mySecretFile"`, this will set
    # `exclude="! -name *.key ! -name mySecretFile"`.
    exclude=""
    for i in `echo $RIAK_EXCLUDE`; do
        exclude="${exclude}! -name $i "
    done
    if [ 0 -eq $get_ssl_certs ]; then
        for i in `echo "'*.pfx' '*.p12' '*.csr' '*.sst' '*.sto' '*.stl' '*.pem' '*.key'"`; do
            exclude="${exclude}! -name $i "
        done
    fi

    # Compose the `find` command that will exclude the above list of files,
    # being aware that an empty `\( \)` will cause a failure in the `find`.
    if [ -z "$exclude" ]; then
        run="find . -type f -exec sh -c '
                mkdir -p \"\$0/\${1%/*}\";
                cp -a \"\$1\" \"\$0/\$1\"
            ' \"\${start_dir}\"/\"\${debug_dir}\"/config {} \;"
    else
        run="find . -type f \\( $exclude \\) -exec sh -c '
                mkdir -p \"\$0/\${1%/*}\";
                cp -a \"\$1\" \"\$0/\$1\"
            ' \"\${start_dir}\"/\"\${debug_dir}\"/config {} \;"
    fi
    eval $run

    # Copy the generated configurations into the archive.
    cd "${start_dir}"/"${debug_dir}"/config

    # Use dump to execute the copy. This will provide a progress dot and
    # capture any error messages.
    dump cp_generated_cfg cp -R "$cuttlefish_conf_dir" .

    # If the copy succeeded, then the output will be empty and it is unneeded.
    if [ 0 -eq $? ]; then
        rm -f cp_generated_cfg
    fi
fi

###
### Produce the output file
###

# One last sanity check before transferring or removing anything
cd "${start_dir}"
if [ -z "$debug_dir" ] || [ ! -d "$debug_dir" ]; then
    echoerr "Couldn't find ${start_dir}/${debug_dir}. Aborting"
    exit 1
fi

if [ '-' = "$outfile" ]; then
    # So we don't get a file literally named -
    tar zcf - "${debug_dir}"
else
    tar zcf "$outfile" "${debug_dir}"

    # provide some indication of the output filename
    printf " $outfile" 1>&2
fi
rm -rf "${debug_dir}"

# keep things looking pretty
echoerr ""
