#!/usr/bin/bash
# Check_MK Agent for Solaris
# +------------------------------------------------------------------+
# |             ____ _               _        __  __ _  __           |
# |            / ___| |__   ___  ___| | __   |  \/  | |/ /           |
# |           | |   | '_ \ / _ \/ __| |/ /   | |\/| | ' /            |
# |           | |___| | | |  __/ (__|   <    | |  | | . \            |
# |            \____|_| |_|\___|\___|_|\_\___|_|  |_|_|\_\           |
# |                                                                  |
# | Copyright Mathias Kettner 2013             mk@mathias-kettner.de |
# +------------------------------------------------------------------+
#
# This file is part of Check_MK.
# The official homepage is at http://mathias-kettner.de/check_mk.
#
# check_mk is free software;  you can redistribute it and/or modify it
# under the  terms of the  GNU General Public License  as published by
# the Free Software Foundation in version 2.  check_mk is  distributed
# in the hope that it will be useful, but WITHOUT ANY WARRANTY;  with-
# out even the implied warranty of  MERCHANTABILITY  or  FITNESS FOR A
# PARTICULAR PURPOSE. See the  GNU General Public License for more de-
# tails. You should have  received  a copy of the  GNU  General Public
# License along with GNU Make; see the file  COPYING.  If  not,  write
# to the Free Software Foundation, Inc., 51 Franklin St,  Fifth Floor,
# Boston, MA 02110-1301 USA.

# Remove locale settings to eliminate localized outputs where possible
export LC_ALL=C
unset LANG

export MK_LIBDIR=${MK_LIBDIR:-/usr/lib/check_mk_agent}
export MK_CONFDIR=${MK_CONFDIR:-/etc/check_mk}
export MK_VARDIR=${MK_VARDIR:-/var/lib/check_mk_agent}

# Optionally set a tempdir for all subsequent calls
#export TMPDIR=

# All executables in PLUGINSDIR will simply be executed and their
# ouput appended to the output of the agent. Plugins define their own
# sections and must output headers with '<<<' and '>>>'
PLUGINSDIR=$MK_LIBDIR/plugins

# All executables in LOCALDIR will by executabled and their
# output inserted into the section <<<local>>>. Please refer
# to online documentation for details.
LOCALDIR=$MK_LIBDIR/local

# close standard input (for security reasons) and stderr
if [ "$1" = -d ]
then
    set -xv
else
    exec <&- 2>/dev/null
fi

function file_age() {
    /usr/bin/perl -e 'if (! -f $ARGV[0]){die "0000000"};$mtime=(stat($ARGV[0]))[9];print ($^T-$mtime);' "$1"
}


function run_mrpe() {
    local descr=$1
    shift
    local cmdline="$@"

    echo '<<<mrpe>>>'

    OUTPUT=$(eval "$cmdline")

    echo -n "$descr $? $OUTPUT" | tr \\n \\1
    echo
}

export -f run_mrpe


# Runs a command asynchronous by use of a cache file
function run_cached () {
    local mrpe=0
    local append_age=0
    # TODO: this function is unable to handle mulitple args at once
    #       for example: -s -m won't work, it is read as single token "-s -m"

    if [ "$1" = -s ] ; then local section="echo '<<<$2>>>' ; " ; shift ; fi
    if [ "$1" = -m ] ; then local mrpe=1 ; shift ; fi
    if [ "$1" = "-ma" ] ; then local mrpe=1 ; local append_age=1 ; shift ; fi
    local NAME=$1
    local MAXAGE=$2
    shift 2
    local CMDLINE="$section$@"

    if [ ! -d $MK_VARDIR/cache ]; then mkdir -p $MK_VARDIR/cache ; fi
    if [ "$mrpe" = 1 ] ; then
        CACHEFILE="$MK_VARDIR/cache/mrpe_$NAME.cache"
    else
        CACHEFILE="$MK_VARDIR/cache/$NAME.cache"
    fi

    # Check if the creation of the cache takes suspiciously long and return
    # nothing if the age (access time) of $CACHEFILE.new is twice the MAXAGE
    if [ -e "$CACHEFILE.new" ] ; then
        AGE=$(file_age "$CACHEFILE.new")
        if [ $AGE -ge $((MAXAGE * 2)) ] ; then
            fuser -k "$CACHEFILE.new" >/dev/null 2>&1
            rm -f "$CACHEFILE.new"
        fi
    fi

    # Check if cache file exists and is recent enough
    if [ -s "$CACHEFILE" ] ; then
        AGE=$(file_age "$CACHEFILE")
        if [ $AGE -le $MAXAGE ] ; then local USE_CACHEFILE=1 ; fi
        # Output the file in any case, even if it is
        # outdated. The new file will not yet be available
        if [ $append_age -eq 1 ] ; then
            # insert the cached-string before the pipe (first -e)
            # or, if no pipe found (-e t) append it (third -e),
            # but only once and on the second line (2) (first line is section header,
            # all further lines are long output)
            cat "$CACHEFILE" | sed -e "2s/|/ (Cached: ${AGE}\/${MAXAGE}s)|/" -e t -e "2s/$/ (Cached: ${AGE}\/${MAXAGE}s)/"
        else
            cat "$CACHEFILE"
        fi
    fi

    # Cache file outdated and new job not yet running? Start it
    if [ -z "$USE_CACHEFILE" ] && [ ! -e "$CACHEFILE.new" ] ; then
        if [ $mrpe -eq 1 ] ; then
            echo "set -o noclobber ; exec > \"$CACHEFILE.new\" || exit 1 ; run_mrpe $NAME \"$CMDLINE\" && mv \"$CACHEFILE.new\" \"$CACHEFILE\" || rm -f \"$CACHEFILE\" \"$CACHEFILE.new\"" | nohup /usr/bin/bash >/dev/null 2>&1 &
        else
            echo "set -o noclobber ; exec > \"$CACHEFILE.new\" || exit 1 ; $CMDLINE && mv \"$CACHEFILE.new\" \"$CACHEFILE\" || rm -f \"$CACHEFILE\" \"$CACHEFILE.new\"" | nohup /usr/bin/bash >/dev/null 2>&1 &
        fi
    fi
}


echo '<<<check_mk>>>'
echo Version: 1.5.0p11
echo AgentOS: solaris


# Find out what zone we are running in
# Treat all pre-Solaris 10 systems as "global"
if type zonename &>/dev/null
then
    zonename=$(zonename)
    pszone="-z $zonename"
else
    zonename="global"
    pszone="-A"
fi


# Get statistics about monitored jobs. Below the job directory there
# is a sub directory per user that ran a job. That directory must be
# owned by the user so that a symlink or hardlink attack for reading
# arbitrary files can be avoided.
if pushd $MK_VARDIR/job >/dev/null; then
    echo '<<<job>>>'
    for username in *
    do
        if [ -d "$username" ] && cd "$username" ; then
            count=$(su "$username" -c "ls -1 * | wc -l")

            if [ "$count" -eq "1" ]; then
                filename=$(su "$username" -c "ls -1 *")
                echo "==> $filename <=="
            fi

            su "$username" -c "head -n1000 *"
            cd ..
        fi
    done
    popd > /dev/null
fi




# Filesystem usage for UFS and VXFS
echo '<<<df>>>'
for fs in ufs vxfs samfs lofs tmpfs
do
    df -l -k -F $fs 2>/dev/null | sed 1d | grep -v "^[^ ]*/lib/[^ ]*\.so\.1 " | \
    while read Filesystem kbytes used avail capacity Mountedon
    do
        kbytes=$(($used + $avail))
        echo "$Filesystem $fs $kbytes $used $avail $capacity $Mountedon"
    done
done

# Filesystem usage for ZFS
if type zfs &>/dev/null
then
    echo '<<<zfsget>>>'
    zfs get -Hp name,usedbydataset,avail,mountpoint,type | sed 's/usedbydataset/used/g' 2>/dev/null
    if [ $? -ne 0 ] ; then
        zfs get -Hp name,referenced,avail,mountpoint,type | sed 's/referenced/used/g'
    fi
    echo '[df]'
    df -l -k -F zfs 2>/dev/null | sed 1d
fi

# ZFS arc cache
# newer Solaris (>=11.3) do not provide hits and misses via mdb -k
echo '<<<zfs_arc_cache>>>'
if type kstat &>/dev/null
then
    kstat -p zfs:0:arcstats | sed -e 's/.*arcstats://g' | awk '{printf "%s = %s\n", $1, $2;}'

elif type mdb &>/dev/null
then
    echo '::arc' | mdb -k
fi

# Processes
echo '<<<ps>>>'
# The default solaris ps command strips the command lines of the processes. But for good process
# matching on the server we really need to whole command line. On linux there are arguments to
# make ps output the whole command line, but on solaris this seems to be missing. We use the ucb
# ps command to get the full command line instead. What a hack.
if [ -x /usr/ucb/ps ]; then
    UCB_PS=$(/usr/ucb/ps -agwwwx)
    PS=$(ps -o user,vsz,rss,pcpu,etime,pid,args $pszone | \
         sed -e 1d -e 's/ *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) */(\1,\2,\3,\4\/\5,\6) /')
    while read -r LINE; do
        STATS=${LINE%) *}
        PID=${STATS##*,}

        # Directly use ps output when line is too slow to be stripped
        if [ ${#LINE} -lt 100 ]; then
            echo "$LINE"
            continue
        fi

        CMD=$(echo "$UCB_PS" | grep "^[ ]*$PID " | head -n1 | \
              awk '{ s = ""; for (i = 5; i <= NF; i++) s = s $i " "; print s }')
        # Only use the ucb ps line when it's not empty (process might already been gone)
        if [ -z "$CMD" ]; then
            echo "$LINE"
        else
            echo "${STATS}) ${CMD}"
        fi
    done <<< "$PS"
else
    ps -o user,vsz,rss,pcpu,args $pszone | \
        sed -e 1d -e 's/ *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) */(\1,\2,\3,\4) /'
fi


# Statgrab
# source: http://www.i-scream.org/libstatgrab/
# binary: http://www.opencsw.org/
if type statgrab &>/dev/null
then
    statgrab_vars="const. cpu. disk. general. mem. page. swap. user."
    statgrab_sections="cpu disk page"

    # Collect net stats in the global zone and in local zones if dlstat is present.
    if [ "$zonename" == "global" ] || type dlstat &>/dev/null
    then
        statgrab_vars="$statgrab_vars net."
        statgrab_sections="$statgrab_sections net"
    fi

    statgrab $statgrab_vars | grep -v md 1> /tmp/statgrab.$$
    for s in $statgrab_sections
    do
        echo "<<<statgrab_$s>>>"
        grep "^$s\." /tmp/statgrab.$$ | cut -d. -f2-99 | sed 's/ *= */ /'
    done

    # <<<statgrab_mem>>> info is preferred over <<<solaris_mem>>>
    # since solaris_mem is under suspicion to be buggy.
    echo '<<<statgrab_mem>>>'
    egrep "^(swap|mem)\." /tmp/statgrab.$$ | sed 's/ *= */ /'

    [ -f /tmp/statgrab.$$ ] && rm -f /tmp/statgrab.$$
fi


# /proc/cpu
# Simulated Output of Linux /proc/cpu
echo '<<<cpu>>>'
load=$(uptime|sed -e 's;.*average: \([0-9]\{1,\}\.[0-9]\{1,\}\), \([0-9]\{1,\}\.[0-9]\{1,\}\), \([0-9]\{1,\}\.[0-9]\{1,\}\).*;\1 \2 \3;')
ps=$(($(ps -o comm $pszone | wc -l)))
procs=$(($(psrinfo | wc -l)))
echo $load 1/$ps $$ $procs


# zpool status
if [ -x /sbin/zpool ]; then
    run_cached -s zpool_status 120 "/sbin/zpool status -x"

    echo '<<<zpool>>>'
    zpool list
fi


# /proc/uptime
# Simulated output of Linux /proc/uptime
echo '<<<uptime>>>'
btime=$(kstat '-p' 'unix:::boot_time' 2>&1|grep 'boot_time'|awk '{print $2}';)
echo $btime


# NTP
ps -o comm $pszone | grep -w .*ntpd &>/dev/null
if [ $? -eq 0 ]
then
    echo '<<<ntp>>>'
    ntpq -np | sed -e 1,2d -e 's/^\(.\)/\1 /' -e 's/^ /%/'
fi


# Memory
# <<<solaris_mem>>> should be used if statgrab is missing and top is available.
if ! type statgrab &>/dev/null;
then
    if [ -x /usr/bin/top ] || [ -x /usr/local/top ]
    then
        echo "<<<solaris_mem>>>"
        if [ -x /usr/bin/top ]; then /usr/bin/top | grep '^Memory:'; fi
        if [ -x /usr/local/bin/top ]; then /usr/bin/top | grep '^Memory:'; fi
    fi
fi

if type prtdiag >/dev/null 2>&1
then
    # prtdiag does not work in local zones
    if [ "$zonename" == "global" ]
    then
        run_cached -s solaris_prtdiag_status 300 '/usr/sbin/prtdiag 1>/dev/null 2>&1; echo $?'
    fi
fi

# TCP Connection stats
echo '<<<tcp_conn_stats>>>'
netstat -n -a -f inet -P tcp | tail  +5 | \
nawk '{ c[$7]++; } END { for (x in c) { print x, c[x]; } }'


# Multipathing
if type mpathadm &>/dev/null
then
    if [ "$zonename" == "global" ]
    then
        echo '<<<solaris_multipath>>>'
        mpathadm list LU | nawk '{if(NR%3==1){dev=$1}
                                  if(NR%3==2){tc=$NF}
                                  if(NR%3==0){printf "%s %s %s\n",dev,tc,$NF}}'
    fi
fi


# Fileinfo-Check: put patterns for files into $MK_CONFDIR/fileinfo.cfg
function replace_datevariable()
{
    # Replace the date variable of the input, e.g. $DATE:%Y%m%d$, by
    # the current date. If there's no match just return the input.
    local file_name="$1"
    local pattern='(\$DATE:(.*)\$)'

    if [[ ! $file_name =~ $pattern ]]; then
        echo "$file_name"
    else
        date_variable="${BASH_REMATCH[1]}"
        format_string="${BASH_REMATCH[2]}"
        echo "${file_name/$date_variable/$(date +$format_string)}"
    fi
}

if [ -f "$MK_CONFDIR/fileinfo.cfg" ]
then
    echo '<<<fileinfo:sep(124)>>>'
    /usr/bin/perl -e 'print time."\n"'

    # note: patterns in the fileinfo.cfg are resolved inside this loop
    OLD_IFS=$IFS
    IFS='
'
    while read -r pattern; do
        case $pattern in
            /*) pattern=`replace_datevariable "$pattern"`
                for file in $pattern; do
                    if [ -f "$file" ]; then
                        echo "$file" | /usr/bin/perl -e '
                            while(my $file = <>) {
                                $file =~ s/\n$//;
                                ($device, $inode, $mode, $nlink, $uid, $gid, $rdev, $size,
                                    $atime, $mtime, $ctime, $blksize, $blocks) = stat($file);
                                print("$file|$size|$mtime\n");
                            }'
                    else
                        echo "$file|missing|`/usr/bin/perl -e 'print time'`"
                    fi
                done ;;
        esac
    done < "$MK_CONFDIR/fileinfo.cfg"
    IFS=$OLD_IFS
fi

# Libelle Business Shadow
if type trd >/dev/null 2>&1
then
    echo '<<<libelle_business_shadow:sep(58)>>>'
    trd -s
fi

# Displaying Information About Faults or Defects
# If there are no faults the output of this command will be empty.
if type fmadm >/dev/null 2>&1
then
    echo '<<<solaris_fmadm:sep(58)>>>'
    fmadm faulty
fi

# Getting Information About Services Running on Solaris
# We can get a list of all service instances, including disabled
# or incomplete ones by 'svcs -a'
if type svcs > /dev/null 2>&1
then
    echo '<<<solaris_services>>>'
    svcs -a
fi

# MK's Remote Plugin Executor
if test -f "$MK_CONFDIR/mrpe.cfg"
then
    echo '<<<mrpe>>>'
    grep -v '^ *#' "$MK_CONFDIR/mrpe.cfg" | grep -v '^ *$' | \
    while read descr cmdline
    do
        interval=
        args="-m"
        if [[ $cmdline =~ \(([^\)]*)\)[[:space:]](.*) ]]
        then
            parameters=${BASH_REMATCH[1]}
            cmdline=${BASH_REMATCH[2]}

            # split multiple parameter assignments
            for par in $(echo $parameters | tr ":" "\n")
            do
                # split each assignment
                key=$(echo $par | cut -d= -f1)
                value=$(echo $par | cut -d= -f2)

                if [ "$key" = "interval" ] ; then
                    interval=$value
                elif [ "$key" = "appendage" ] ; then
                    args="-ma"
                fi
            done
        fi

        if [ -z "$interval" ]
        then
            run_mrpe $descr "$cmdline"
        else
            run_cached $args $descr $interval "$cmdline"
        fi
    done
fi

# Local checks
if cd $LOCALDIR 2>/dev/null
then
    echo '<<<local>>>'
    for skript in $(ls)
    do
        if [ -x "$skript" ] ; then
            ./$skript
        fi
    done

    # Call some plugins only every X'th second
    for skript in [1-9]*/* ; do
        if [ -x "$skript" ] ; then
            run_cached local_${skript//\//\#} ${skript%/*} "$skript"
        fi
    done

fi

# Plugins
if cd $PLUGINSDIR 2>/dev/null
then
    for skript in $(ls)
    do
        if [ -x "$skript" ] ; then
            ./$skript
        fi
    done

    # Call some plugins only every X'th second
    for skript in [1-9]*/* ; do
        if [ -x "$skript" ] ; then
            run_cached plugins_${skript//\//\#} ${skript%/*} "$skript"
        fi
    done
fi

