You are on page 1of 9

File: /home/msampaio/qhas_linux.

sh
#!/bin/ksh
###############################################################################
# Purpose: To have a common secure local/remote cluster status monitor which
# does not use clinfo and provides a different degree of flexibility
# than clstat - in one tool
#
# Description: An 'clstat' alternative monitoring script. See Usage.
# Differences to clstat :
#
1/. Uses ssh rather clinfo. Unprompted ssh access must be configured
#
- prior to running this script
#
2/. Designed to be configurable by the end user
#
3/. Displays the internal cluster mgr state [-i]
#
4/. Cmd line script, produces both text std out and cgi
#
- for color display via web brower (refresh 5 secs)
#
5/. Output can be changed by to remove network/address information [-n]
#
6/. Can be run as a one off report [-1], will loop by default
#
7/. Will currently monitor only one cluster
#
- future enhancements to follow..
#
# Version: 1.005
#
# Author:
Alex Abderrazag, IBM UK Ltd.
###############################################################################
# Modified in 02/05/2013 to run in Linux servers using only snmp.
#
# Visual and sound alert added.
#
#
# Author: Marcos Jean Sampaio (msampaio@br.ibm.com)
###############################################################################

usage()
{
printf
printf
printf
printf
exit 1
}

"Usage: $PROGNAME [-n] [-1] [-i]\n"


"\t-n Omit Network info\n"
"\t-1 Display 1 report rather than loop\n"
"\t-i Displays the internal state of cluster manager\n"

###############################################################################
#
# Global VARs
#
###############################################################################
#*******************Please Alter the VARs below as appropriate*****
#LOGFILE="/tmp/$NODE.qhaslog"
#HTMLFILE="/tmp/$NODE.qhashtml"
#CGIFILE="/home/msampaio/www/nmon2chart-cgi/$NODE.cgi"
#CLHOSTS="/usr/es/sbin/cluster/etc/clhosts"
#USER=root # to be used for ssh access
SNMPCOMM=public
#******ONLY alter the code below this line, if you want to change******
#********************this behaviour of this script*********************
INTERNAL=0
PROGNAME=$(basename ${0})
COMMUNITY="public"
NODES="10.12.0.95 10.14.0.185 10.14.0.107"
#export PATH=$(/usr/es/sbin/cluster/utilities/cl_get_path all)

Page 1 of 9

File: /home/msampaio/qhas_linux.sh

Page 2 of 9

#HA_DIR="$(cl_get_path)"
# set up some global variables with SNMP branch info
# cluster
CLUSTER_BRANCH="1.3.6.1.4.1.2.3.1.2.1.5.1"
CLUSTER_NAME="$CLUSTER_BRANCH.2"
CLUSTER_STATE="$CLUSTER_BRANCH.4"
CLUSTER_SUBSTATE="$CLUSTER_BRANCH.8"
CLUSTER_NUM_NODES="$CLUSTER_BRANCH.11"
# node
NODE_BRANCH="1.3.6.1.4.1.2.3.1.2.1.5.2.1.1"
NODE_ID="$NODE_BRANCH.1"
NODE_STATE="$NODE_BRANCH.2"
NODE_NUM_IF="$NODE_BRANCH.3"
NODE_NAME="$NODE_BRANCH.4"
# network
NETWORK_BRANCH="1.3.6.1.4.1.2.3.1.2.1.5.4.1.1"
NETWORK_ID="$NETWORK_BRANCH.2"
NETWORK_NAME="$NETWORK_BRANCH.3"
NETWORK_ATTRIBUTE="$NETWORK_BRANCH.4"
NETWORK_STATE="$NETWORK_BRANCH.5"
# address
ADDRESS_BRANCH="1.3.6.1.4.1.2.3.1.2.1.5.3.1.1"
ADDRESS_IP="$ADDRESS_BRANCH.2"
ADDRESS_LABEL="$ADDRESS_BRANCH.3"
ADDRESS_NET="$ADDRESS_BRANCH.5"
ADDRESS_STATE="$ADDRESS_BRANCH.6"
ADDRESS_ACTIVE_NODE="$ADDRESS_BRANCH.7"
# resource group
RG_BRANCH="1.3.6.1.4.1.2.3.1.2.1.5.11"
RG_ID="$RG_BRANCH.1.1.1"
RG_NAME="$RG_BRANCH.1.1.2"
RG_NODE_STATE="$RG_BRANCH.3.1.3"
###############################################################################
#
# Name: format_cgi
#
# Create the cgi (on the fly!)
#
###############################################################################
format_cgi()
{
echo '#!/bin/ksh
print "Content-type: text/html\n";' > $CGIFILE
#touch $CGIFILE
ex -s $CGIFILE <<EOF
a
cat $HTMLFILE | sed '1s:^:<h3>:' | sed '1s:$:</h3>:' | sed 's:UNSTABLE:<font
color="#FDD017"><blink>UNSTABLE</blink><font color="#ffffff">:g'| sed 's:JOINING:<font
color="#FDD017"><blink>JOINING</blink><font color="#ffffff">:g' | sed 's:LEAVING:<font
color="#FDD017"><blink>LEAVING</blink><font color="#ffffff">:g' | sed 's: STABLE:<font color="#00FF00">
STABLE<font color="#ffffff">:g' | sed 's/qn:/<font color="#2B65EC">qn:<font color="#ffffff">/g' | sed
's:UP:<font color="#00FF00">UP<font color="#ffffff">:g' | sed 's:DOWN:<font color="#FF0000"><blink>DOWN</
blink><font color="#ffffff">:g'| sed 's:ONLINE:<font color="#00FF00">ONLINE<font color="#ffffff">:g' |
sed 's:OFFLINE:<font color="#0000FF"><blink>OFFLINE</blink><font color="#ffffff">:g' > $STATFILE
grep DOWN $HTMLFILE > /dev/null
if [ "\$?" -eq 0 ];then
echo "<audio autoplay loop>
<source src="../clsmon/tos-redalert.wav" />
<audio/>" >> $STATFILE
fi

File: /home/msampaio/qhas_linux.sh

Page 3 of 9

cat << EOM


<HTML>
<META HTTP-EQUIV="REFRESH" CONTENT="10">
<HEAD><TITLE>HACMP Cluster Status - a Mancunian production </TITLE>
<script type="text/javascript">
<!-spe=500;
na=document.all.tags("blink");
swi=1;
bringBackBlinky();
function bringBackBlinky() {
if (swi == 1) {
sho="visible";
swi=0;
}
else {
sho="hidden";
swi=1;
}
for(i=0;i<na.length;i++) {
na[i].style.visibility=sho;
}
setTimeout("bringBackBlinky()", spe);
}
-->
</script>
<BODY COLOR="#ffffff" LINK="red" VLINK="blue" BGCOLOR="#000000">
<PRE>
<font COLOR="#ffffff">
<HR SIZE=3>
<font size="3.5"><font face="verdana">
<div style="padding-left: 11em;">
EOM
cat $STATFILE
echo "<div/>"
.
wq
EOF
chmod 755 $CGIFILE
}
###############################################################################
#
# Name: print_address_info
#
# Prints the address information for the node and network given in the
# environment
#
###############################################################################
print_address_info()
{
[[ "$VERBOSE_LOGGING" = "high" ]] && set -x
# Get key (IP addresses) from MIB
addresses=$(echo "$ADDRESS_MIB_FUNC" | grep -w "$ADDRESS_IP.$node_id"| uniq | sort | cut -f3 -d" ")
# Get the active Node for each IP address
for address in $addresses
do
address_net_id=$(echo "$ADDRESS_MIB_FUNC" | grep -w "$ADDRESS_NET.$node_id.$address" | cut -f3 -d" ")
if [[ "$address_net_id" = "$net_id" ]]
then
active_node=$(echo "$ADDRESS_MIB_FUNC" | grep -w "$ADDRESS_ACTIVE_NODE.$node_id.$address" | cut f3 -d" ")

File: /home/msampaio/qhas_linux.sh

Page 4 of 9

if [[ "$active_node" = $node_id ]]
then
address_label=$(echo "$ADDRESS_MIB_FUNC" | grep -w "$ADDRESS_LABEL.$node_id.$address" | cut f2 -d\")
address_state=$(echo "$ADDRESS_MIB_FUNC" | grep -w "$ADDRESS_STATE.$node_id.$address" | cut f3 -d" ")
printf "\t%-15s %-20s " $address $address_label
case $address_state in
2)
printf "UP\n"
;;
4)
printf "DOWN\n"
;;
*)
printf "UNKNOWN\n"
;;
esac
fi
fi
done
}
###############################################################################
#
# Name: print_rg_info
#
# Prints the online RG status info.
#
###############################################################################
print_rg_info()
{
#i=1;
#RGONSTAT=`echo "$CLUSTER_MIB" | grep -w "$node_name" |grep -w ONLINE | while read A
#do
#
if [ i -eq 1 ];then printf "\n\tResource Group(s) active on $node_name:\n"; fi
#
echo "$A" | awk -F: '{printf "\t %-15s %-10s %-10s\n", $1, $2, $9}'
#
let i=i+1
#
done`
##if [ $i -gt 1 ]; then printf "$RGONSTAT\n"; fi
#echo $RGONSTAT | grep ONLINE > /dev/null 2>&1
##printf "$RGONSTAT\n"
#if [ $? -eq 0 ]
#then
#
printf "$RGONSTAT\n"
#fi
RG_MIB_FUNC=$(snmpwalk -r 2 -t 60 -c $COMMUNITY -O faUnQ -v 1 $NODE $RG_NODE_STATE 2> /dev/null)
echo ""
echo " Resource Groups :"
RGS=`snmpwalk -r 2 -t 60 -c $COMMUNITY -O faUnQ -v 1 $NODE 1.3.6.1.4.1.2.3.1.2.1.5.11.1.1.2 | wc -l`
i=0
while [[ $i -le $RGS ]]
do
i=$((i+1))
`IFS="\n"; echo $RG_MIB_FUNC | grep "$RG_NODE_STATE.$i.$node_id = 2" > /dev/null 2>&1`
if [ $? -eq 0 ];then

File: /home/msampaio/qhas_linux.sh

Page 5 of 9

echo " "`snmpwalk -r 2 -t 60 -c $COMMUNITY -O faUnQ -v 1 $NODE $RG_NAME.$i 2> /dev/null | awk '{print
$3}' | sed 's/"//g'` " "State: ONLINE
fi
`IFS="\n"; echo $RG_MIB_FUNC | grep "$RG_NODE_STATE.$i.$node_id = 32" > /dev/null 2>&1`
if [ $? -eq 0 ];then
echo " "`snmpwalk -r 2 -t 60 -c $COMMUNITY -O faUnQ -v 1 $NODE $RG_NAME.$i 2> /dev/null | awk '{print
$3}' | sed 's/"//g'` " "State: LEAVING
fi
`IFS="\n"; echo $RG_MIB_FUNC | grep "$RG_NODE_STATE.$i.$node_id = 16" > /dev/null 2>&1`
if [ $? -eq 0 ];then
echo " "`snmpwalk -r 2 -t 60 -c $COMMUNITY -O faUnQ -v 1 $NODE $RG_NAME.$i 2> /dev/null | awk '{print
$3}' | sed 's/"//g'` " "State: JOINING
fi
#echo state: $rg_node_state
#echo id: $node_id
#
#
#
#
#
#

case $rg_node_state in
2) node_state="Online" ;;
4) node_state="Offline" ;;
16) node_state="Joining" ;;
32) node_state="Leaving" ;;
esac

done
}
###############################################################################
#
# Name: print_network_info
#
# Prints the network information for the node given in the environment
#
###############################################################################
print_network_info()
{
[[ "$VERBOSE_LOGGING" = "high" ]] && set -x
# Get network IDs
network_ids=$(echo "$NETWORK_MIB_FUNC" | grep -w "$NETWORK_ID.$node_id" | cut -f3 -d" " | uniq | sort n )
# Get states for these networks on this node
for net_id in $network_ids
do
printf "\n"
network_name=$(echo "$NETWORK_MIB_FUNC" | grep -w "$NETWORK_NAME.$node_id.$net_id" | cut -f2 -d\")
network_attribute=$(echo "$NETWORK_MIB_FUNC" | grep -w "$NETWORK_ATTRIBUTE.$node_id.$net_id" | cut f3 -d" ")
network_state=$(echo "$NETWORK_MIB_FUNC" | grep -w "$NETWORK_STATE.$node_id.$net_id" | cut -f3 -d" ")
formatted_network_name=$(echo "$network_name" | awk '{printf "%-18s", $1}')

File: /home/msampaio/qhas_linux.sh

Page 6 of 9

printf " Network : $formatted_network_name State: " "$formatted_network_name"


case $network_state in
2)
printf "UP\n"
;;
4)
printf "DOWN\n"
;;
32)
printf "JOINING\n"
;;
64)
printf "LEAVING\n"
;;
*)
printf "N/A\n"
;;
esac
PRINT_IP_ADDRESS="true"
# If serial type network, then don't attempt to print IP Address
[[ $network_attribute -eq 4 ]] && PRINT_IP_ADDRESS="false"
print_address_info
done
}
###############################################################################
#
# Name: print_node_info
#
# Prints the node information for each node found in the MIB
#
###############################################################################
print_node_info()
{
[[ "$VERBOSE_LOGGING" = "high" ]] && set -x
NODE_ID_COUNTER=0
while [[ $cluster_num_nodes -ne 0 ]]
do
# Get node information for each node
node_id=$(echo "$NODE_MIB" | grep -w "$NODE_ID.$NODE_ID_COUNTER" | cut -f3 -d " ")
let NODE_ID_COUNTER=NODE_ID_COUNTER+1
# Node ids may not be contiguous
if [[ -z "$node_id" ]]
then
continue
fi
node_state=$(echo "$NODE_MIB" | grep -w "$NODE_STATE.$node_id" | cut -f3 -d" ")
node_num_if=$(echo "$NODE_MIB" | grep -w "$NODE_NUM_IF.$node_id" | cut -f3 -d" ")
node_name=$(echo "$NODE_MIB" | grep -w "$NODE_NAME.$node_id" | cut -f2 -d\")
formatted_node_name=$(echo "$node_name" | awk '{printf "%-15s", $1}')

#
#
#
#

echo ""
printf "Node : $formatted_node_name State: " "$formatted_node_name"
if [ INTERNAL -eq 1 ]; then
internal_state=`ssh root@$node_name lssrc -ls clstrmgrES |grep -i state |awk '{print $3}'`
finternal_state=`echo "($internal_state)"`
fi

File: /home/msampaio/qhas_linux.sh
case $node_state in
2)
printf "UP $finternal_state\n"
;;
4)
printf "DOWN $finternal_state\n"
;;
32)
printf "JOINING $finternal_state\n"
;;
64)
printf "LEAVING $finternal_state\n"
;;
esac
NETWORK_MIB_FUNC=$(echo "$NETWORK_MIB" | grep -w "$NETWORK_BRANCH\..\.$node_id")
ADDRESS_MIB_FUNC=$(echo "$ADDRESS_MIB" | grep -w "$ADDRESS_BRANCH\..\.$node_id")
if [ $NETWORK = "TRUE" ]; then
print_network_info
fi
print_rg_info
let cluster_num_nodes=cluster_num_nodes-1
done
}
###############################################################################
#
# Name: print_cluster_info
#
# Prints the cluster information for the cluster found in the MIB of which
# this node is a member.
#
###############################################################################
print_cluster_info ()
{
HANODE=$1
cluster_name=$(echo "$CLUSTER_MIB" | grep -w "$CLUSTER_NAME\.0" | cut -f2 -d\")
cluster_state=$(echo "$CLUSTER_MIB" | grep -w "$CLUSTER_STATE\.0" | cut -f3 -d" ")
cluster_substate=$(echo "$CLUSTER_MIB" | grep -w "$CLUSTER_SUBSTATE\.0" | cut -f3 -d" ")
case $cluster_state in
2)
cs="UP"
;;
4)
cs="DOWN"
;;
esac
case $cluster_substate in
4)
css="DOWN"
;;
8)
css="UNKNOWN"
;;
16)
css="UNSTABLE"
;;
2 | 32)

Page 7 of 9

File: /home/msampaio/qhas_linux.sh

Page 8 of 9

css="STABLE"
;;
64)
css="ERROR"
;;
128)
css="RECONFIG"
;;
esac
echo "Status for $cluster_name on $(date +%d" "%b" "%y" "%T)"
echo "Cluster is ($cs & $css)
qn: $HANODE"
cluster_num_nodes=$(echo "$CLUSTER_MIB" | grep -w "$CLUSTER_NUM_NODES\.0" | cut -f3 -d" ")
print_node_info
}
get_node ()
{
LOGFILE="/tmp/$1.qhaslog"
HTMLFILE="/tmp/$1.qhashtml"
CGIFILE="/home/clsmon/www/clsmon/clsmon-cgi/`basename ${0}|cut -d "." -f 1`.cgi"
STATFILE="/tmp/$1.aastat"
COMMUNITY="public"

# get the snmp info


CLUSTER_MIB=`snmpwalk -r 2 -t 120 -c $COMMUNITY -O faUnQ -v 1 $1 1.3.6.1.4.1.2.3.1.2.1.5 2> /dev/
null`
# CLUSTER_MIB=`snmpwalk -c $COMMUNITY -O faUnQ -v 1 $1 1.3.6.1.4.1.2.3.1.2.1.5.1;snmpwalk -c
$COMMUNITY -O faUnQ -v 1 $1 1.3.6.1.4.1.2.3.1.2.1.5.4.1.1;snmpwalk -c $COMMUNITY -O faUnQ -v 1 $1
1.3.6.1.4.1.2.3.1.2.1.5.2.1.1;snmpwalk -c $COMMUNITY -O faUnQ -v 1 $1 1.3.6.1.4.1.2.3.1.2.1.5.3 2> /dev/
null`
# is there any snmp info?
snmpinfocheck=`echo $CLUSTER_MIB |grep $CLUSTER_BRANCH`
if [[ $? -eq 0 && $snmpinfocheck != "" ]]; then
NODE_MIB=$CLUSTER_MIB
NETWORK_MIB=$CLUSTER_MIB
ADDRESS_MIB=$CLUSTER_MIB
# Print Topology Information
SUCCESS=1 && print_cluster_info $1 > $LOGFILE
cat $LOGFILE
cp $LOGFILE $HTMLFILE
if [ $STOP -eq 1 ]; then exit; fi
else
SUCCESS=0 && echo "Data unavailable on NODE: $1
$(date +%d" "%b" "%y" "%T)
Check cluster node state" | tee $HTMLFILE
service clsmon reload-script `basename $0` &
fi
format_cgi
}
main ()
{
for NODE in $NODES

File: /home/msampaio/qhas_linux.sh
do
SUCCESS=1
while [ $SUCCESS -eq 1 ]
do
ping -w 3 -c1 $NODE > /dev/null 2>&1
if [ $? -eq 0 ]; then
get_node $NODE
else
SUCCESS=0 && echo "NODE: $NODE not responding" | tee $HTMLFILE
fi
done
done
}
# sort the flags
NETWORK="TRUE"; STOP=0
while getopts :n1i ARGs
do
case $ARGs in
n) NETWORK="FALSE" ;;
1) STOP=1 ;;
i) INTERNAL=1 ;;
\?) printf "\nNot a valid option\n\n" ; usage ; exit ;;
esac
done
###############################################################################
# Main
###############################################################################
while true
do
main
done
exit 0

Page 9 of 9

You might also like