Professional Documents
Culture Documents
useradd
#######################################
# Exadata Cell Serice Startup Time
#######################################
# CELLSRVC Cell Server process
dcli -l root -g ~/cell_group "ps -ef|grep 'cellsrv 100' |grep -v grep"
#############################
# Exadata Hardware Checks
#############################
#-- FlashDisks
dcli -l root -g cell_group " cellcli -e \"LIST PHYSICALDISK ATTRIBUTES
name,status,diskType,physicalSize,errorcount,physicalInsertTime,lastFailureReason
where disktype=flashdisk AND status != 'normal' \" "
dcli -l root -g cell_group " cellcli -e \"LIST LUN ATTRIBUTES
name,status,diskType,deviceName,raidLevel,isSystemLun,lunSize,physicalDrives,lunWri
teCacheMode,errorCount where disktype=flashdisk AND status != 'normal' \" "
dcli -l root -g cell_group " cellcli -e \"LIST lun ATTRIBUTES
name,status,diskType,deviceName,raidLevel,isSystemLun,lunSize,physicalDrives,lunWri
teCacheMode,errorCount where disktype=flashdisk and physicalDrives='' \" "
#-- FlashCache
dcli -l root -g cell_group " cellcli -e 'LIST flashcache attributes
name,status,size,creationTime,degradedCelldisks,effectiveCacheSize ' "
#-- FlashLog
dcli -l root -g cell_group " cellcli -e 'LIST flashlog attributes
name,status,size,creationTime,degradedCelldisks,effectiveSize,efficiency ' "
#-- Check current Battery Charge Capacity (Should be > 800 mAh)
dcli -l root -g all_group '/opt/MegaRAID/MegaCli/MegaCli64 -AdpBbuCmd
-GetBbuCapacityInfo -a0 | grep "Full Charge" '
#-- FlashCache
dcli -l root -g cell_group " cellcli -e \"LIST GRIDDISK ATTRIBUTES
name,cachingPolicy where name like '.*RECO.*' \" "
dcli -l root -g cell_group " cellcli -e \"LIST GRIDDISK ATTRIBUTES
name,cachingPolicy where cachingPolicy != 'default' \" "
dcli -l root -g cell_group " cellcli -e \"LIST METRICCURRENT WHERE name =
'GD_BY_FC_DIRTY' and metricObjectName like '.*RECO.*' \" "
dcli -l root -g cell_group " cellcli -e \"LIST METRICCURRENT WHERE name =
'GD_IO_BY_R_LG' and metricObjectName like '.*RECO.*' \" "
dcli -l root -g cell_group " cellcli -e \"LIST METRICCURRENT WHERE name =
'GD_IO_BY_R_SM' and metricObjectName like '.*RECO.*' \" "
#############################
# Exadata Version Checks
#############################
#############################
# Exadata Patching Checks
#############################
# Alert Log
#####################
less $ORACLE_BASE/diag/rdbms/`echo $ORACLE_SID|cut -b 1-8`/
$ORACLE_SID/trace/alert_$ORACLE_SID.log
dcli -l oracle -g dbs_group 'egrep -A10 "Aug 01.*2012"
$ORACLE_BASE/diag/rdbms/`echo $ORACLE_SID|cut -b 1-8`/
$ORACLE_SID/trace/alert_$ORACLE_SID.log |egrep "2012|ORA-00600|trc" | grep -B1 ORA-
|egrep -v "^--" '
########################
# Cell Details
########################
dcli -l root -g cell_group " cellcli -e ' LIST CELL ATTRIBUTES
name,cellNumber,status,flashCacheMode,flashCacheCompress,fanStatus,powerStatus,temp
eratureStatus,cellsrvStatus,msStatus,rsStatus,releaseVersion,releaseTrackingBug ' "
# FlashCache Compression
##########################
# GridDisk Details
##################
cellcli -e ' DESCRIBE griddisk '
# FlashDisk Details
###################
# FlashCache Contents
#######################
dcli -l root -g cell_group " cellcli -e ' LIST FLASHCACHECONTENT ATTRIBUTES
dbUniqueName,dbID,objectNumber,tableSpaceNumber,cachedSize,cachedKeepSize,hitCount,
missCount,hoursToExpiration ' "
# Reset FlashCache
alter cell events = "immediate cellsrv.cellsrv_flashcache(Reset,0,0,0)"
# How to find which cell node is a SQL or session is running on and what it is
doing?
###################################################################################
####
cellcli -e ' LIST ACTIVEREQUEST where dbName = mydb and instanceNumber = 1 '
cellcli -e ' LIST ACTIVEREQUEST where sessionID=1271 and sessionSerNumber=31026
detail '
##########################
# Serial Numbers
##########################
#-- Exadata Rack serial number
/usr/bin/ipmitool sunoem cli "show /SP system_identifier"
#-- Exadata Compute/Cell node - serial number
/usr/bin/ipmitool sunoem cli "show /SYS product_serial_number"
#-- Rack level and Individual Serial Numbers
/opt/oracle.SupportTools/CheckHwnFWProfile -S
#################################################################
# IPMI Tool (ILOM) Commands
#################################################################
###############################
#-- ILOM (SP - Service Processor) Commands
###############################
/usr/bin/ipmitool sunoem cli "reset -script /SP"
#-- If still unable to restart ILOM, try SSH to ILOM and issue follwoing.
reset -script /SP
###############################
# ILOM Server Commands
###############################
/usr/bin/ipmitool sunoem cli "reset -script /SYS"
/usr/bin/ipmitool sunoem cli "stop -script /SYS"
/usr/bin/ipmitool sunoem cli "start -script /SYS"
#-- Server Power (chassis power Commands: status, on, off, cycle, reset, diag,
soft)
/usr/bin/ipmitool power status
/usr/bin/ipmitool power on
/usr/bin/ipmitool power soft
/usr/bin/ipmitool power off
/usr/bin/ipmitool power cycle
/usr/bin/ipmitool power reset
/usr/bin/ipmitool power diag
###############################
# ILOM Console
###############################
#-- Start Console
start -script /SP/Console
#
/usr/bin/ipmitool sunoem cli " show -level all /SP/faultmgmt"
# Show as table
/usr/bin/ipmitool sunoem cli "show -o table -level all /SP/faultmgmt"
##########################
# Boot Order
##########################
##################################
# Disk Controller Commmands
##################################
#-- Help
/opt/MegaRAID/MegaCli/MegaCli64 -help
################################
#-- Check Battery Learn Cycle
###############################
dcli -l root -g all_group '/opt/MegaRAID/MegaCli/MegaCli64 -AdpBbuCmd -a0 | grep
"Auto-Learn Mode" '
dcli -l root -g all_group '/opt/MegaRAID/MegaCli/MegaCli64 -AdpBbuCmd
-GetBbuProperties -a0'|grep 'Auto Learn Period'
dcli -l root -g all_group '/opt/MegaRAID/MegaCli/MegaCli64 -AdpBbuCmd -a0 | grep
"Learn Cycle Active" '
dcli -l root -g all_group '/opt/MegaRAID/MegaCli/MegaCli64 -AdpBbuCmd -a0 | grep
"Learn Cycle Requested" '
dcli -l root -g all_group '/opt/MegaRAID/MegaCli/MegaCli64 -AdpBbuCmd -a0 | grep
"Learn Cycle Status" '
#-- Check auto learn mode, Should be enabled on compute node and disabled on cell
node.
dcli -l root -g all_group '/opt/MegaRAID/MegaCli/MegaCli64 -AdpBbuCmd
-GetBbuProperties -a0'|grep 'Auto-Learn Mode'
dcli -l root -g all_group '/opt/MegaRAID/MegaCli/MegaCli64 -AdpBbuCmd
-GetBbuProperties -a0'|grep 'Auto Learn Period'
#-- Check current Battery Charge Capacity (Should be > 800 mAh)
dcli -l root -g all_group '/opt/MegaRAID/MegaCli/MegaCli64 -AdpBbuCmd
-GetBbuCapacityInfo -a0 | grep "Full Charge" '
##########################
# Recreate the Cell
##########################
##########################
# Exadata Cell Metrics
##########################
# Metric definition
cellcli -e describe METRICDEFINITION
cellcli -e "list METRICDEFINITION attributes
name,metricType,objectType,persistencePolicy,unit,description where name like
'FC.*' "
cellcli -e "list METRICDEFINITION attributes
name,metricType,objectType,persistencePolicy,unit,description where name like
'DB.*' "
cellcli -e "list METRICDEFINITION attributes
name,metricType,objectType,persistencePolicy,unit,description where description
like '.*flash.*' "
cellcli -e "list METRICDEFINITION attributes
name,metricType,objectType,persistencePolicy,unit,description where description
like '.*read.*' "
cellcli -e "list METRICDEFINITION attributes
name,metricType,objectType,persistencePolicy,unit,description where description
like '.*write.*per.*sec.*' "
#---------------------------
# Smart IO Metrics
#---------------------------
cellcli -e "list METRICDEFINITION attributes
name,metricType,objectType,persistencePolicy,unit,description where name like
'SIO.*' "
dcli -l root -g cell_group "cellcli -e list metriccurrent where name =
'SIO_IO_EL_OF' " #The cumulative number of megabytes eligible for offload by smart
I/O.
dcli -l root -g cell_group "cellcli -e list metriccurrent where name =
'SIO_IO_OF_RE' " #The cumulative number of interconnect megabytes returned by
smart I/O.
dcli -l root -g cell_group "cellcli -e list metriccurrent where name =
'SIO_IO_EL_OF_SEC' "
dcli -l root -g cell_group "cellcli -e list metriccurrent where name =
'SIO_IO_OF_RE_SEC' "
dcli -l root -g cell_group "cellcli -e list metriccurrent where name =
'SIO_IO_RD_FC_SEC' " #Number of megabytes per second read from flash cache by
smart IO
dcli -l root -g cell_group "cellcli -e list metriccurrent where name =
'SIO_IO_RD_HD_SEC' " #Number of megabytes per second read from hard disk by smart
IO
dcli -l root -g cell_group "cellcli -e list metriccurrent where name =
'SIO_IO_SI_SV_SEC' " #Number of megabytes per second saved by storage index
#---------------------------
# FlashCache Metrics
#---------------------------
cellcli -e "list METRICDEFINITION attributes
name,metricType,objectType,persistencePolicy,unit,description where name like
'.*FC.*' "
cellcli -e "list METRICDEFINITION attributes
name,metricType,objectType,persistencePolicy,unit,description where description
like '.*flash.*' "
cellcli -e "list METRICDEFINITION attributes
name,metricType,objectType,persistencePolicy,unit,description where name like
'.*FC.*' and description like '.*read.*' "
cellcli -e "list METRICDEFINITION attributes
name,metricType,objectType,persistencePolicy,unit,description where name like
'.*FC.*' and description like '.*writ.*' "
#---------------------------
# Database IO per second
#---------------------------
# Current Metric
cellcli -e "describe metriccurrent"
######################
#-- Metric History
######################
cellcli -e "describe metrichistory"
#-------------------
# Last 10 minutes
#-------------------
cellcli -e "list metrichistory where name = 'DB_IO_BY_SEC' and collectionTime >
'`date --date=\" 10 min ago \" +%Y-%m-%dT%H:%M:%S%z`' "
#----------------------------
# Last 1 hour for a database
#----------------------------
cellcli -e "list metrichistory attributes
collectionTime,metricObjectName,name,metricValue,metricValueAvg,metricValueMax,metr
icValueMin where name = 'DB_IO_BY_SEC' \
and collectionTime > '`date --date=\" 1 hour ago \" +%Y-%m-%dT%H:%M:%S%z`' "
#----------------------------
# Specific Time Window
#----------------------------
cellcli -e "list metrichistory where name = 'DB_IO_BY_SEC' and metricObjectName
= 'STBY52_PR01PIMI' and collectionTime > '2015-04-16T14:00:00-08:00' and
collectionTime < '2015-04-16T18:00:00-08:00' "
##########################################
# Storage Server Metric By database
###########################################
DB_FC_IO_BY_SEC The number of megabytes of I/O per second for this database
to flash cache.
DB_FC_IO_RQ The number of I/O requests issued by a database to flash
cache.
DB_FC_IO_RQ_SEC The number of I/O requests issued by a database to flash
cache per second.
DB_FD_IO_BY_SEC The number of megabytes of I/O per second for this database
to flash disks.
DB_FD_IO_LOAD The average I/O load from this database for flash disks.
DB_FD_IO_RQ_LG The number of large I/O requests issued by a database to
flash disks.
DB_FD_IO_RQ_LG_SEC The number of large I/O requests issued by a database to
flash disks per second.
DB_FD_IO_RQ_SM The number of small I/O requests issued by a database to
flash disks.
DB_FD_IO_RQ_SM_SEC The number of small I/O requests issued by a database to
flash disks per second.
DB_FD_IO_TM The cumulative latency of reading blocks by a database from
flash disks.
DB_FD_IO_TM_RQ The rate which is the average latency of reading blocks per
request by a database from flash disks.
DB_FD_IO_UTIL The percentage of flash resources utilized from this
database.
DB_IO_BY_SEC The number of megabytes of I/O per second for this database
to hard disks.
DB_IO_LOAD The average I/O load from this database for hard disks.
DB_IO_RQ_LG The cumulative number of large I/O requests issued by the
database. A large value indicates a heavy large I/O workload from this database.
DB_IO_RQ_LG_SEC The rate of large I/O requests issued by a consumer group
per second over the past minute. A large value indicates a heavy large I/O workload
from this database in the past minute.
DB_IO_RQ_SM The cumulative number of small I/O requests issued by the
database. A large value indicates a heavy small I/O workload from this database.
DB_IO_RQ_SM_SEC The rate of small I/O requests issued by a consumer group
per second over the past minute. A large value indicates a heavy small I/O workload
issued by this database in the past minute.
DB_IO_TM_LG The cumulative latency of reading large blocks by a database
from hard disks.
DB_IO_TM_LG_RQ The rate which is the average latency of reading large
blocks per request by a database from hard disks.
DB_IO_TM_SM The cumulative latency of reading small blocks by a database
from hard disks.
DB_IO_TM_SM_RQ The rate which is the average latency of reading small
blocks per request by a database from hard disks.
DB_IO_UTIL_LG The percentage of disk resources utilized by large requests
from this database.
DB_IO_UTIL_SM The percentage of disk resources utilized by small requests
from this database.
DB_IO_WT_LG The cumulative number of milliseconds that large I/O
requests issued by the database have waited to be scheduled by IORM. A large value
indicates that the I/O workload from this database is exceeding the allocation
specified for it in the interdatabase plan.
DB_IO_WT_LG_RQ The average number of milliseconds that large I/O requests
issued by the database have waited to be scheduled by IORM in the past minute. A
large value indicates that the I/O workload from this database is exceeding the
allocation specified for it in the interdatabase plan.
DB_IO_WT_SM The cumulative number of milliseconds that small I/O
requests issued by the database have waited to be scheduled by IORM. A large value
indicates that the I/O workload from this database is exceeding the allocation
specified for it in the interdatabase plan.
DB_IO_WT_SM_RQ The average number of milliseconds that small I/O requests
issued by the database have waited to be scheduled by IORM in the past minute. A
large value indicates that the I/O workload from this database is exceeding the
allocation specified for it in the interdatabase plan.
###############################################
# Tabular IO per database (MB/sec) on a cell
################################################
#fromtime="`date --date=\" 30 min ago \" +%Y-%m-%dT%H:%M:%S%z`"
#totime="`date +%Y-%m-%dT%H:%M:%S%z`"
fromtime='2016-02-16T07:30:00-08:00'
totime='2016-02-16T10:30:00-08:00'
# DB_IO_BY_SEC
# DB_IO_RQ_SM_SEC
# DB_IO_RQ_LG_SEC
# DB_IO_UTIL_SM
# DB_IO_UTIL_LG
# DB_FD_IO_RQ_SM_SEC
# DB_FD_IO_RQ_LG_SEC
##########################################
# Tabular database IO on a cell
# (Multiple metrics for a database)
###########################################
##########################
# colleclt Commands
##########################
collectl -scdmnfx -o T -i 2
##########################
# Colmux Commands
##########################
colmux -command -scdmnf -reverse -column 0 -addr <comma-separated-server-LIST or
filename>
#---------------------------------------------
# Collectl top or Colmux top
# Show cluster wide top (order by CPU Pct)
#---------------------------------------------
colmux -command "-sZ -i:2 " -column 12 -lines 50 -port 64123 -addr dbs_group
colmux -command "-sZ -i:2 --procfilt=fora_smon " -column 12 -lines 50 -port 64123
-addr dbs_group
colmux -command "-sZ -i:2 --procfilt=fbt01pimi " -column 12 -lines 50 -port 64123
-addr dbs_group_bt01pimi
#-------------------------
# Hard Disk Statistics
#-------------------------
sudo colmux -command '-sD ' -lines 100 -reverse -column 1 -port 64002 -addr
dm02cel01
##########################
# Colplot commands
##########################
colplot -dir /var/log/collectl/consolidated -contains dm01db01 -date "20120909"
-time "00:00-24:00" -plots cpu,mem,disk,net,nfssum,ib,paging,swap,inode,proc,tcp
-filetype pdf -email emailaddress -subject "DM01 Plots"
colplot -dir /var/log/collectl/consolidated -contains dm01db01 -date "20120909-
20120910" -time "00:00-24:00" -plots
cpu,mem,disk,net,nfssum,ib,paging,swap,inode,proc,tcp -filetype pdf -email
vishal@vishalgupta.com -subject "DM01 Plots"
##########################
# Install Collectl
##########################
#-- ColPlot require the http to installed on the server.
#-- Follwoign RPMs are needed for colplot to work
- httpd
- mailcap (For emails)
- sharutils (For /usr/bin/uuencode for emails)
- gnuplot
- ghostscript (For PDFs)
- cups-libs
- cairo
- libtiff
- urw-fonts
- chkfontpath
- xorg-x11-font-utils
- ghostscript-fonts (For PDFs)
##########################
# Setup Colplot
##########################
#-- Add "-P -oz" to following line in /etc/collectl.conf
DaemonCommands = -f /var/log/collectl -r00:00,7 -m -F60 -s+YZ -P -oz
###########################################
# Collectl Changes/Customizatons
###########################################
- /etc/collectl.conf
- Gather configuration to store data in plottable format
- /usr/bin/colplot
- Change colplot home page cgi script to stop it from trying to find earlier and
latest file.
- /etc/colplot.conf
- Change default PlotDir
PlotDir = /export/ora_stage/colplot
- /usr/share/collectl/colplotlib.defs
##########################
# Disable Collectl
##########################
dcli -l root -g ~/all_group service collectl stop
dcli -l root -g ~/all_group chkconfig collectl off
dcli -l root -g ~/all_group chkconfig --list collectl
##########################
# Enable Collectl
##########################
dcli -l root -g ~/all_group service collectl start
dcli -l root -g ~/all_group chkconfig collectl on
dcli -l root -g ~/all_group chkconfig --list collectl
##########################
# De-Install Collectl
##########################
dcli -l root -g ~/all_group service collectl stop
dcli -l root -g ~/all_group rpm -e collectl collectl-utils
#####################################
# Install collectl on all nodes
######################################
dcli -l root -g all_group cp -p /etc/collectl.conf /tmp/
#############################################
# Install collectl-utils on all nodes
############################################
dcli -l root -g dbs_group cp -p /etc/colplot.conf /tmp/
dcli -l root -g all_group '/usr/bin/collectl -v |head -1'
dcli -l root -g all_group -f /export/ora_stage/vishal/collectl/collectl-
3.7.4.src.tar.gz -d /var/tmp/
dcli -l root -g all_group "cd /var/tmp/ ; gunzip -c collectl-3.7.4.src.tar.gz |tar
xvf - > /dev/null"
dcli -l root -g all_group service collectl status
dcli -l root -g all_group service collectl stop
dcli -l root -g all_group chkconfig collectl off
dcli -l root -g all_group chkconfig --list collectl
dcli -l root -g all_group rpm -e collectl
dcli -l root -g all_group rpm -q collectl
dcli -l root -g all_group "cd /var/tmp/collectl-3.7.4; sh INSTALL"
dcli -l root -g all_group '/usr/bin/collectl -v |head -1'
dcli -l root -g all_group -f /tmp/collectl.conf -d /tmp/
dcli -l root -g all_group cp -p /tmp/collectl.conf /etc/
dcli -l root -g all_group service collectl start
dcli -l root -g all_group service collectl status
dcli -l root -g all_group chkconfig collectl on
dcli -l root -g all_group chkconfig --list collectl
dcli -l root -g all_group rm /var/tmp/collectl-3.7.4.src.tar.gz
dcli -l root -g all_group rm -fR /var/tmp/collectl-3.7.4
dcli -l root -g all_group rm -f /etc/collectl.conf.rpmsave
#####################################
# Upgrade collectl to 3.6.5-2
######################################
dcli -l root -g all_group rpm -q collectl
dcli -l root -g all_group cp -p /etc/collectl.conf /tmp/
dcli -l root -g all_group -f /export/ora_stage/vishal/collectl/collectl-3.6.5-
2.noarch.rpm -d /var/tmp/
dcli -l root -g all_group service collectl stop
dcli -l root -g all_group rpm -Uvh /var/tmp/collectl-3.6.5-2.noarch.rpm
dcli -l root -g all_group rpm -q collectl
dcli -l root -g all_group cp -p /tmp/collectl.conf /etc/
dcli -l root -g all_group service collectl start
dcli -l root -g all_group service collectl status
dcli -l root -g all_group chkconfig collectl on
dcli -l root -g all_group chkconfig --list collectl
dcli -l root -g all_group rm /var/tmp/collectl-3.6.5-2.noarch.rpm
dcli -l root -g all_group rm -f /etc/collectl.conf.rpmsave
#####################################
# Downgrade collectl to 3.6.5-2
######################################
dcli -l root -g all_group cp -p /etc/collectl.conf /tmp/
dcli -l root -g all_group -f /export/ora_stage/vishal/collectl/collectl-3.6.5-
2.noarch.rpm -d /var/tmp/
dcli -l root -g all_group rpm -e collectl
dcli -l root -g all_group rpm -Uvh /var/tmp/collectl-3.6.5-2.noarch.rpm
dcli -l root -g all_group rm /var/tmp/collectl-3.6.5-2.noarch.rpm
dcli -l root -g all_group cp -p /tmp/collectl.conf /etc/
dcli -l root -g all_group service collectl start
dcli -l root -g all_group service collectl status
dcli -l root -g all_group chkconfig collectl on
dcli -l root -g all_group chkconfig --list collectl
dcli -l root -g all_group rm -f /etc/collectl.conf.rpmsave
#####################################
# Upgrade collectl to 3.6.9-1
######################################
dcli -l root -g all_group rpm -q collectl
dcli -l root -g all_group cp -p /etc/collectl.conf /tmp/
dcli -l root -g all_group -f /export/ora_stage/vishal/collectl/collectl-3.6.9-
1.noarch.rpm -d /var/tmp/
dcli -l root -g all_group service collectl stop
dcli -l root -g all_group rpm -Uvh /var/tmp/collectl-3.6.9-1.noarch.rpm
dcli -l root -g all_group rpm -q collectl
dcli -l root -g all_group cp -p /tmp/collectl.conf /etc/
dcli -l root -g all_group service collectl start
dcli -l root -g all_group service collectl status
dcli -l root -g all_group chkconfig collectl on
dcli -l root -g all_group chkconfig --list collectl
dcli -l root -g all_group rm /var/tmp/collectl-3.6.9-1.noarch.rpm
dcli -l root -g all_group rm -f /etc/collectl.conf.rpmsave
#####################################
# Upgrade collectl to 3.7.4-1
######################################
dcli -l root -g all_group cp -p /etc/collectl.conf /tmp/
dcli -l root -g all_group '/usr/bin/collectl -v |head -1'
dcli -l root -g all_group -f /export/ora_stage/vishal/collectl/collectl-
3.7.4.src.tar.gz -d /var/tmp/
dcli -l root -g all_group "cd /var/tmp/ ; gunzip -c collectl-3.7.4.src.tar.gz |tar
xvf - > /dev/null"
dcli -l root -g all_group service collectl status
dcli -l root -g all_group service collectl stop
dcli -l root -g all_group chkconfig collectl off
dcli -l root -g all_group chkconfig --list collectl
dcli -l root -g all_group rpm -e collectl
dcli -l root -g all_group rpm -q collectl
dcli -l root -g all_group "cd /var/tmp/collectl-3.7.4; sh INSTALL"
dcli -l root -g all_group '/usr/bin/collectl -v |head -1'
dcli -l root -g all_group cp -p /tmp/collectl.conf /etc/
dcli -l root -g all_group -d /etc/ -f /etc/collectl.conf
dcli -l root -g all_group service collectl start
dcli -l root -g all_group service collectl status
dcli -l root -g all_group chkconfig collectl on
dcli -l root -g all_group chkconfig --list collectl
dcli -l root -g all_group rm /var/tmp/collectl-3.7.4.src.tar.gz
dcli -l root -g all_group rm -fR /var/tmp/collectl-3.7.4
dcli -l root -g all_group rm -f /etc/collectl.conf.rpmsave
#####################################
# Upgrade collectl-utils to 3.1.0-1
######################################
dcli -l root -g all_group rpm -q collectl-utils
dcli -l root -g all_group cp -p /etc/colplot.conf /usr/bin/colplot /tmp/
dcli -l root -g all_group -f /export/ora_stage/vishal/collectl/collectl-utils-
3.1.0-1.noarch.rpm -d /var/tmp/
dcli -l root -g all_group rpm -e collectl-utils
dcli -l root -g all_group rpm -Uvh /var/tmp/collectl-utils-3.1.0-1.noarch.rpm
dcli -l root -g all_group rm /var/tmp/collectl-utils-3.1.0-1.noarch.rpm
dcli -l root -g all_group cp -p /tmp/colplot.conf /etc/
dcli -l root -g all_group rm -f /etc/collectl.conf.rpmsave
/etc/colplot.conf.rpmsave
dcli -l root -g all_group rpm -q collectl-utils
#dcli -l root -g all_group cp -p /tmp/colplot /usr/bin/colplot
#dcli -l root -g all_group cp -p /tmp/colmux /usr/bin/colmux
#####################################
# Upgrade collectl-utils to 3.2.1-1
######################################
dcli -l root -g all_group rpm -q collectl-utils
dcli -l root -g all_group cp -p /etc/colplot.conf /usr/bin/colplot
/usr/bin/colmux /tmp/
dcli -l root -g all_group -f /export/ora_stage/vishal/collectl/collectl-utils-
3.2.1-1.noarch.rpm -d /var/tmp/
dcli -l root -g all_group rpm -e collectl-utils
dcli -l root -g all_group rpm -Uvh /var/tmp/collectl-utils-3.2.1-1.noarch.rpm
dcli -l root -g all_group rm /var/tmp/collectl-utils-3.2.1-1.noarch.rpm
dcli -l root -g all_group cp -p /tmp/colplot.conf /etc/
dcli -l root -g all_group cp -p /tmp/colplot /usr/bin/colplot
dcli -l root -g all_group cp -p /tmp/colmux /usr/bin/colmux
dcli -l root -g all_group rm -f /etc/collectl.conf.rpmsave
/etc/colplot.conf.rpmsave
dcli -l root -g all_group rpm -q collectl-utils
dcli -l root -g all_group service httpd restart
########################################
# Downgrade collectl-utils to 3.2.1-1
########################################
dcli -l root -g all_group rpm -q collectl-utils
dcli -l root -g all_group cp -p /etc/colplot.conf /usr/bin/colplot
/usr/bin/colmux /tmp/
dcli -l root -g all_group -f /export/ora_stage/vishal/collectl/collectl-utils-
3.2.1-1.noarch.rpm -d /var/tmp/
dcli -l root -g all_group rpm -e collectl-utils
dcli -l root -g all_group rpm -Uvh /var/tmp/collectl-utils-3.2.1-1.noarch.rpm
dcli -l root -g all_group rm /var/tmp/collectl-utils-3.2.1-1.noarch.rpm
dcli -l root -g all_group cp -p /tmp/colplot.conf /etc/
dcli -l root -g all_group cp -p /tmp/colplot /usr/bin/colplot
dcli -l root -g all_group cp -p /tmp/colmux /usr/bin/colmux
dcli -l root -g all_group rm -f /etc/collectl.conf.rpmsave
/etc/colplot.conf.rpmsave
dcli -l root -g all_group rpm -q collectl-utils
dcli -l root -g all_group service httpd restart
######################################
# Upgrade collectl-utils to 4.7.1-1
######################################
dcli -l root -g all_group rpm -q collectl-utils
dcli -l root -g all_group cp -p /etc/colplot.conf /usr/bin/colplot
/usr/bin/colmux /tmp/
dcli -l root -g all_group -f /export/ora_stage/vishal/collectl/collectl-utils-
4.7.1-1.noarch.rpm -d /var/tmp/
dcli -l root -g all_group rpm -e collectl-utils
dcli -l root -g all_group rpm -Uvh /var/tmp/collectl-utils-4.7.1-1.noarch.rpm
dcli -l root -g all_group rm /var/tmp/collectl-utils-4.7.1-1.noarch.rpm
dcli -l root -g all_group -f /tmp/colplot.conf -d /etc/
dcli -l root -g all_group -f /tmp/colmux -d /usr/bin/
dcli -l root -g all_group rm -f /etc/collectl.conf.rpmsave
/etc/colplot.conf.rpmsave
dcli -l root -g all_group rpm -q collectl-utils
dcli -l root -g all_group service httpd restart
##############################
# Install after cell patching (Collectl 3.7.4, collectl-utils 4.7.1-1 )
###############################
dcli -l root -g cell_group '/usr/bin/collectl -v |head -1'
dcli -l root -g cell_group -f /export/ora_stage/vishal/collectl/collectl-
3.7.4.src.tar.gz -d /var/tmp/
dcli -l root -g cell_group "cd /var/tmp/ ; gunzip -c collectl-3.7.4.src.tar.gz |
tar xvf - > /dev/null"
dcli -l root -g cell_group "cd /var/tmp/collectl-3.7.4; sh INSTALL"
dcli -l root -g cell_group '/usr/bin/collectl -v |head -1'
dcli -l root -g cell_group -f /export/ora_stage/vishal/collectl/collectl-utils-
4.7.1-1.noarch.rpm -d /var/tmp/
dcli -l root -g cell_group rpm -Uvh /var/tmp/collectl-utils-4.7.1-1.noarch.rpm
dcli -l root -g cell_group rpm -q collectl-utils
dcli -l root -g cell_group rm /var/tmp/collectl-3.7.4.src.tar.gz
dcli -l root -g cell_group rm -fR /var/tmp/collectl-3.7.4
dcli -l root -g cell_group rm -f /etc/collectl.conf.rpmsave
dcli -l root -g cell_group rm /var/tmp/collectl-utils-4.7.1-1.noarch.rpm
dcli -l root -g cell_group -d /etc -f /etc/collectl.conf
dcli -l root -g cell_group -d /etc -f /etc/colplot.conf
dcli -l root -g cell_group -d /usr/bin/ -f /usr/bin/colmux
dcli -l root -g cell_group service collectl start
dcli -l root -g cell_group service collectl status
dcli -l root -g cell_group chkconfig collectl on
dcli -l root -g cell_group chkconfig --list collectl
##########################################
# Collect & Collectl-utils Verification
##########################################
dcli -l root -g all_group rpm -q collectl
dcli -l root -g all_group rpm -q collectl-utils
dcli -l root -g all_group service collectl status
dcli -l root -g all_group chkconfig --list collectl
#Customization Verification
# (Needs to have -P -oz at the end of the line)
dcli -l root -g all_group 'grep ^DaemonCommands /etc/collectl.conf'
# (PlotDir needs to be /export/ora_stage/colplot)
dcli -l root -g dbs_group 'grep PlotDir /etc/colplot.conf'
# Should be 10
dcli -l root -g all_group "grep '^my \$age' /usr/bin/colmux"
# Should have ServerAliveInterval=60 in it
dcli -l root -g all_group "grep '^my \$Ssh' /usr/bin/colmux"
# Should be commented
dcli -l root -g all_group "egrep 'findFiles.*undef' /var/www/html/colplot/index.cgi
|head -1"
- /var/www/html/colplot/index.cgi
- Change colplot home page cgi script to stop it from trying to find earlier and
latest file.
#Changed by Vishal
#print "<input type=radio name=timeframe value=fixed checked>\n";
print "<input type=radio name=timeframe value=fixed>\n";
print "<font=5><i>OR</i>\n";
print "<b>Last: <input type=text name=winsize size=1 value=$winsize>
Minutes\n";
#Changed by Vishal
#print "<input type=radio name=timeframe value=float>\n";
print "<input type=radio name=timeframe value=float checked>\n";
###################################
# Troubleshooting
###################################
###################################
# Infiniband Troubleshooting
###################################
# Infinicheck
###################
/opt/oracle.SupportTools/ibdiagtools/infinicheck -g ~/dbs_ib_group
# infiniband commands
######################
ibswitches
ibhosts
ibnodes
ibstatus
iblinkinfo
##
export SUBNET_MGR_GID=`sminfo | cut -d" " -f7 | cut -c3-16`;
export SUBNET_MGR_LOC="OTHER";
for IB_NODE_GID in `ibswitches | cut -c14-27`;
do
if [ $SUBNET_MGR_GID = $IB_NODE_GID ];
then
export SUBNET_MGR_LOC="IB_SWITCH";
fi;
done;
echo $SUBNET_MGR_LOC;
###################################
# OS Watcher Commands
###################################
#############################################################
#-- Get IO Statistics on a hardisk from OS Watcher
#############################################################
File Format
###############
zzz ***Tue Feb 11 09:01:54 PST 2014 Sample interval: 5 secconds
Linux 2.6.32-400.11.1.el5uek (servername) 02/11/14
Time: 09:01:54
avg-cpu: %user %nice %system %iowait %steal %idle
1.56 0.00 1.46 1.43 0.00 95.55
cd /opt/oracle.oswatcher/osw/archive/oswiostat
###################################################################################
#-- Get IO Statistics on a hardisk from OS Watcher (TABULAR for each disk device)
###################################################################################
#############################################################
#-- Filter the OS Watcher Top output for a particular process
#############################################################
cd /opt/oracle.oswatcher/osw/archive/oswtop/
for i in `ls *14.04.24.0500.dat*`;
do
bunzip2 -c $i |egrep -h '^zzz|^top|tnslsnr LISTENER -inherit' | awk '
BEGIN { date="";time="";}
{
if ($1 == "zzz")
{
date=$4 "-" $3 "-" $7
}
else {
if ($1 == "top")
{
time=$3
}
else {
print date " " time " " $0 ;
}
}
}
'
done
#######################################################################
#-- Filter the OS Watcher Top output for high CPU usage processes
#######################################################################
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
1 - PID
2 - USER
3 - PRIORITY
4 - Nice
5 - Virtual Memory
6 - Resident Memory
7 - Shared Memory
8 - Status
9 - %CPU
10 - %Memory
11 - Time
12 - Command
cd /opt/oracle.oswatcher/osw/archive/oswtop/
for i in `ls *14.04.29.2000*.dat.bz2`
do
bunzip2 -c $i | egrep -v '^Tasks|^Mem|^Swap' | awk '
BEGIN { date=""; time=""; }
{
if ($1 == "zzz") { date=$4 "-" $3 "-" $7 }
else { if ($1 == "top") { time=$3 }
else { if ($9 >= 80) #Resource Usage
{ print date " " time " " $0 ; }
}
}
}
'
done
#######################################################################
#-- Filter the OS Watcher Top output for high CPU usage processes
#######################################################################
1 - PID
2 - USER
3 - PRIORITY
4 - Nice
5 - Virtual Memory
6 - Resident Memory
7 - Shared Memory
8 - Status
9 - %CPU
10 - %Memory
11 - Time
12 - Command
cd /opt/oracle.oswatcher/osw/archive/oswtop/
for i in `ls *14.04.29.2000*.dat.bz2`
do
bunzip2 -c $i | egrep -v '^Tasks|^Mem|^Swap' | awk '
BEGIN { date=""; time=""; }
{
if ($1 == "zzz") { date=$4 "-" $3 "-" $7 }
else { if ($1 == "top") { time=$3 }
else { if ($9 >= 80) #Resource Usage
{ print date " " time " " $0 ; }
}
}
}
'
done
#######################################################################
#-- Expect script
#######################################################################
Reference - http://progeeking.com/2013/10/28/exadata-io-statistics/
#####################################################################
# I/O latency statistics (Note the IO L stats)
alter cell events="immediate cellsrv.cellsrv_dump('iolstats',0)";
alter cell events="immediate cellsrv.cellsrv_resetstats('iolstats')";
#
alter cell events = "immediate
cellsrv.cellsrv_setparam('_cell_gen_time_stats_level','1')";
alter cell events = "immediate
cellsrv.cellsrv_setparam('_cell_gen_time_stats_level','0')";
# To enable tracing of the auto disk management modules on a storage cell, run
cellcli and enter the following:
alter cell events='trace[cellsrv.cellsrv_events_layer]
memory=highest,disk=highest'
# To disable tracing of the auto disk management modules on a storage cell, run
cellcli and enter the following:
alter cell events='trace[cellsrv.cellsrv_events_layer] off';
Possible parameter
_cell_1mb_buffers_hugepage_support=false
_cell_disable_ant_check_reid=true
_cell_io_hang_reboot=false
_cell_io_hang_time=60
_cell_num_16k_buffers=2000
_cell_num_1mb_brr_buffers=5
_cell_num_1mb_buffers=200
_cell_num_1mb_bwr_buffers=5
_cell_num_32k_buffers=1000
_cell_num_64k_buffers=1000
_cell_num_8k_buffers=5000
_cell_num_buffers=1200
_cell_print_all_params=true
_cellrsbkp_poll_invl=15
_cellrsdef_fast_restart=0
_cellrsdef_heartbeat_timeout=6
_cellrsdef_srvc_cleanup_time=5
_cellrsdef_srvc_dump=30
_cellrsms_poll_invl=60
_cellrsos_poll_invl=15
_cellrssrv_poll_invl=15
_ms_cell_ioctl_timeout=60000
_reconnect_to_cell_attempts=4
_reonnect_to_cell_freq_in_sec=4
_skgxp_gen_ant_off_rpc_timeout_in_sec=300
_skgxp_gen_rpc_timeout_in_sec=90
_skgxp_udp_use_tcb=false
_skgxp_udp_use_tcb=false
###################################
ZFS Tuning
##############
###################################
Exadata Network Interface Check
###################################
###############################
Exadata Unix Account Creation
###############################
#################
Exadata Tuning
#################
#------------------------------------------
# Increase vm.max_map_count to 200000
#------------------------------------------
# Check
dcli -l root -g dbs_group cat /proc/sys/vm/max_map_count
dcli -l root -g dbs_group /sbin/sysctl vm.max_map_count
# Change
dcli -l root -g dbs_group 'echo 200000 > /proc/sys/vm/max_map_count'
dcli -l root -g dbs_group 'echo vm.max_map_count=200000 >> /etc/sysctl.conf'
##########################
Exadata Disk Scrubbing
##########################
# Set disk scrubbing interval (Valid options are daily, weekly, biweekly and none).
Time is in ms.
# 14 days = 1209600000 ms
# 7 days = 604800000 ms
# 1 day = 86400000 ms
dcli -l root -g ~/cell_group " cellcli -e ALTER CELL
hardDiskScrubInterval=biweekly "
-------------------------------------------------------------------
-- ASM Disk Errors
-------------------------------------------------------------------
SELECT failgroup
, sum(DECODE(substr(name,INSTR(name,'_',-1,3)+1,5),
'CD_00',read_errs+write_errs,0)) CD_00
, sum(DECODE(substr(name,INSTR(name,'_',-1,3)+1,5),
'CD_01',read_errs+write_errs,0)) CD_01
, sum(DECODE(substr(name,INSTR(name,'_',-1,3)+1,5),
'CD_02',read_errs+write_errs,0)) CD_02
, sum(DECODE(substr(name,INSTR(name,'_',-1,3)+1,5),
'CD_03',read_errs+write_errs,0)) CD_03
, sum(DECODE(substr(name,INSTR(name,'_',-1,3)+1,5),
'CD_04',read_errs+write_errs,0)) CD_04
, sum(DECODE(substr(name,INSTR(name,'_',-1,3)+1,5),
'CD_05',read_errs+write_errs,0)) CD_05
, sum(DECODE(substr(name,INSTR(name,'_',-1,3)+1,5),
'CD_06',read_errs+write_errs,0)) CD_06
, sum(DECODE(substr(name,INSTR(name,'_',-1,3)+1,5),
'CD_07',read_errs+write_errs,0)) CD_07
, sum(DECODE(substr(name,INSTR(name,'_',-1,3)+1,5),
'CD_08',read_errs+write_errs,0)) CD_08
, sum(DECODE(substr(name,INSTR(name,'_',-1,3)+1,5),
'CD_09',read_errs+write_errs,0)) CD_09
, sum(DECODE(substr(name,INSTR(name,'_',-1,3)+1,5),
'CD_10',read_errs+write_errs,0)) CD_10
, sum(DECODE(substr(name,INSTR(name,'_',-1,3)+1,5),
'CD_11',read_errs+write_errs,0)) CD_11
FROM gv$asm_disk_stat
where name like '%CD%'
GROUP BY failgroup
ORDER BY 1;
##########################
# Exadata Rsync backup
##########################
BACKUP_DIR=/u23/oraback/exadata/`hostname -s`/
rsync -a -v --one-file-system --delete /app ${BACKUP_DIR}
rsync -a -v --one-file-system --delete /bin ${BACKUP_DIR}
rsync -a -v --one-file-system --delete /boot ${BACKUP_DIR}
rsync -a -v --one-file-system --delete /etc ${BACKUP_DIR}
rsync -a -v --one-file-system --delete /lib ${BACKUP_DIR}
rsync -a -v --one-file-system --delete /lib64 ${BACKUP_DIR}
rsync -a -v --one-file-system --delete /mnt ${BACKUP_DIR}
rsync -a -v --one-file-system --delete /opt ${BACKUP_DIR}
rsync -a -v --one-file-system --delete /root ${BACKUP_DIR}
rsync -a -v --one-file-system --delete /sbin ${BACKUP_DIR}
rsync -a -v --one-file-system --delete /selinux ${BACKUP_DIR}
rsync -a -v --one-file-system --delete /srv ${BACKUP_DIR}
rsync -a -v --one-file-system --delete /usr ${BACKUP_DIR}
rsync -a -v --one-file-system --delete /var ${BACKUP_DIR}
rsync -a -v --one-file-system --delete /u01 ${BACKUP_DIR}
--exclude='/u01/patches/' --exclude='*.aud' --exclude='*.trc' --exclude='*.trm'
BACKUP_DIR=/u23/oraback/exadata/`hostname -s`/
rsync -a -v --one-file-system --delete /u01 ${BACKUP_DIR}
--exclude='/u01/patches/' --exclude='*.aud' --exclude='*.trc' --exclude='*.trm'