监控Elasticsearch的插件推荐 强大的shell脚本
#!/bin/bash
################################################################################
# Script: check_es_system.sh #
# Author: Claudio Kuenzler www.claudiokuenzler.com #
# Purpose: Monitor ElasticSearch Store (Disk) Usage #
# Licence: GPLv2 #
# Licence : GNU General Public Licence (GPL) http://www.gnu.org/ #
# This program is free software; you can redistribute it and/or #
# modify it under the terms of the GNU General Public License #
# as published by the Free Software Foundation; either version #
# of the License, or (at your option) any later version. #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# #
# GNU General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with this program; if not, see <https://www.gnu.org/licenses/>. #
# #
# Copyright ,, Claudio Kuenzler #
# Copyright Tomas Barton #
# #
# History: #
# : Started programming plugin #
# : Continued programming. Working now as it should =) #
# : Added memory usage check, check types option (-t) #
# : Renamed plugin from check_es_store to check_es_system #
# : Change internal referenced variable name for available size #
# : Output now contains both used and available sizes #
# : Add missing -t in usage output #
# : Fix if statement for authentication (@deric) #
# : Fix authentication when wrong credentials were used #
# : Configure max_time for Elastic to respond (@deric) #
# : Fix alternative subject name in ssl (issue ), direct to auth #
# : Added status check type #
# : Check for mandatory parameter checktype, adjust help #
# : Catch connection refused error #
################################################################################
#Variables and defaults
STATE_OK= # define the exit code if status is OK
STATE_WARNING= # define the exit code if status is Warning
STATE_CRITICAL= # define the exit code if status is Critical
STATE_UNKNOWN= # define the exit code if status is Unknown
export PATH=$PATH:/usr/local/bin:/usr/bin:/bin # Set path
version=1.3
port=
httpscheme=http
unit=G
warning=
critical=
max_time=
################################################################################
#Functions
help () {
echo -e "$0 $version (c) 2016-$(date +%Y) Claudio Kuenzler and contributers (published below GPL licence) Usage: ./check_es_system.sh -H ESNode [-P port] [-S] [-u user] [-p pass] -t checktype [-d int] [-o unit] [-w int] [-c int] [-m int] Options: * -H Hostname or ip address of ElasticSearch Node
-P Port (defaults to )
-S Use https
-u Username if authentication is required
-p Password if authentication is required
* -t Type of check (disk|mem|status)
+ -d Available size of disk or memory (ex. )
-o Disk space unit (K|M|G) (defaults to G)
-w Warning threshold in percent (default: )
-c Critical threshold in percent (default: )
-m Maximum time in seconds to wait for response (default: )
-h Help! *mandatory options
+mandatory options for types disk,mem Requirements: curl, jshon, expr"
exit $STATE_UNKNOWN;
} authlogic () {
if [[ -z $user ]] && [[ -z $pass ]]; then echo "ES SYSTEM UNKNOWN - Authentication required but missing username and password"; exit $STATE_UNKNOWN
elif [[ -n $user ]] && [[ -z $pass ]]; then echo "ES SYSTEM UNKNOWN - Authentication required but missing password"; exit $STATE_UNKNOWN
elif [[ -n $pass ]] && [[ -z $user ]]; then echo "ES SYSTEM UNKNOWN - Missing username"; exit $STATE_UNKNOWN
fi
} unitcalc() {
# ES presents the currently used disk space in Bytes
if [[ -n $unit ]]; then
case $unit in
K) availsize=$(expr $available \* ); outputsize=$(expr ${size} / );;
M) availsize=$(expr $available \* \* ); outputsize=$(expr ${size} / / );;
G) availsize=$(expr $available \* \* \* ); outputsize=$(expr ${size} / / / );;
esac
if [[ -n $warning ]] ; then
warningsize=$(expr $warning \* ${availsize} / )
fi
if [[ -n $critical ]] ; then
criticalsize=$(expr $critical \* ${availsize} / )
fi
usedpercent=$(expr $size \* / $availsize)
else echo "UNKNOWN - Shouldnt exit here. No units given"; exit $STATE_UNKNOWN
fi
} availrequired() {
if [ -z ${available} ]; then echo "UNKNOWN - Missing parameter '-d'"; exit $STATE_UNKNOWN; fi
}
################################################################################
# Check requirements
for cmd in curl jshon expr; do
if ! `which ${cmd} >/dev/null`; then
echo "UNKNOWN: ${cmd} does not exist, please check if command exists and PATH is correct"
exit ${STATE_UNKNOWN}
fi
done
################################################################################
# Check for people who need help - aren't we all nice ;-)
if [ "${1}" = "--help" -o "${#}" = "" ]; then help; exit $STATE_UNKNOWN; fi
################################################################################
# Get user-given variables
while getopts "H:P:Su:p:d:o:w:c:t:m:" Input;
do
case ${Input} in
H) host=${OPTARG};;
P) port=${OPTARG};;
S) httpscheme=https;;
u) user=${OPTARG};;
p) pass=${OPTARG};;
d) available=${OPTARG};;
o) unit=${OPTARG};;
w) warning=${OPTARG};;
c) critical=${OPTARG};;
t) checktype=${OPTARG};;
m) max_time=${OPTARG};;
*) help;;
esac
done # Check for mandatory opts
if [ -z ${host} ]; then help; exit $STATE_UNKNOWN; fi
if [ -z ${checktype} ]; then help; exit $STATE_UNKNOWN; fi
################################################################################
# Retrieve information from Elasticsearch
esurl="${httpscheme}://${host}:${port}/_cluster/stats"
eshealthurl="${httpscheme}://${host}:${port}/_cluster/health"
if [[ -z $user ]]; then
# Without authentication
esstatus=$(curl -k -s --max-time ${max_time} $esurl)
if [[ $? -eq ]]; then
echo "ES SYSTEM CRITICAL - Failed to connect to ${host} port ${port}: Connection refused"
exit $STATE_CRITICAL
elif [[ $? -eq ]]; then
echo "ES SYSTEM CRITICAL - server did not respond within ${max_time} seconds"
exit $STATE_CRITICAL
fi
# Additionally get cluster health infos
if [ $checktype = status ]; then
eshealth=$(curl -k -s --max-time ${max_time} $eshealthurl)
fi
fi if [[ -n $user ]] || [[ -n $(echo $esstatus | grep -i authentication) ]] ; then
# Authentication required
authlogic
esstatus=$(curl -k -s --max-time ${max_time} --basic -u ${user}:${pass} $esurl)
if [[ $? -eq ]]; then
echo "ES SYSTEM CRITICAL - Failed to connect to ${host} port ${port}: Connection refused"
exit $STATE_CRITICAL
elif [[ $? -eq ]]; then
echo "ES SYSTEM CRITICAL - server did not respond within ${max_time} seconds"
exit $STATE_CRITICAL
elif [[ -n $(echo $esstatus | grep -i "unable to authenticate") ]]; then
echo "ES SYSTEM CRITICAL - Unable to authenticate user $user for REST request"
exit $STATE_CRITICAL
fi
# Additionally get cluster health infos
if [[ $checktype = status ]]; then
eshealth=$(curl -k -s --max-time ${max_time} --basic -u ${user}:${pass} $eshealthurl)
fi
fi # Catch empty reply from server (typically happens when ssl port used with http connection)
if [[ -z $esstatus ]] || [[ $esstatus = '' ]]; then
echo "ES SYSTEM UNKNOWN - Empty reply from server (verify ssl settings)"
exit $STATE_UNKNOWN
fi # Do the checks
case $checktype in
disk) # Check disk usage
availrequired
size=$(echo $esstatus | jshon -e indices -e store -e "size_in_bytes")
unitcalc
if [ -n "${warning}" ] || [ -n "${critical}" ]; then
# Handle tresholds
if [ $size -ge $criticalsize ]; then
echo "ES SYSTEM CRITICAL - Disk usage is at ${usedpercent}% ($outputsize $unit from $available $unit)|es_disk=${size}B;${warningsize};${criticalsize};;"
exit $STATE_CRITICAL
elif [ $size -ge $warningsize ]; then
echo "ES SYSTEM WARNING - Disk usage is at ${usedpercent}% ($outputsize $unit from $available $unit)|es_disk=${size}B;${warningsize};${criticalsize};;"
exit $STATE_WARNING
else
echo "ES SYSTEM OK - Disk usage is at ${usedpercent}% ($outputsize $unit from $available $unit)|es_disk=${size}B;${warningsize};${criticalsize};;"
exit $STATE_OK
fi
else
# No thresholds
echo "ES SYSTEM OK - Disk usage is at ${usedpercent}% ($outputsize $unit from $available $unit)|es_disk=${size}B;;;;"
exit $STATE_OK
fi
;; mem) # Check memory usage
availrequired
size=$(echo $esstatus | jshon -e nodes -e jvm -e mem -e "heap_used_in_bytes")
unitcalc
if [ -n "${warning}" ] || [ -n "${critical}" ]; then
# Handle tresholds
if [ $size -ge $criticalsize ]; then
echo "ES SYSTEM CRITICAL - Memory usage is at ${usedpercent}% ($outputsize $unit) from $available $unit|es_memory=${size}B;${warningsize};${criticalsize};;"
exit $STATE_CRITICAL
elif [ $size -ge $warningsize ]; then
echo "ES SYSTEM WARNING - Memory usage is at ${usedpercent}% ($outputsize $unit from $available $unit)|es_memory=${size}B;${warningsize};${criticalsize};;"
exit $STATE_WARNING
else
echo "ES SYSTEM OK - Memory usage is at ${usedpercent}% ($outputsize $unit from $available $unit)|es_memory=${size}B;${warningsize};${criticalsize};;"
exit $STATE_OK
fi
else
# No thresholds
echo "ES SYSTEM OK - Memory usage is at ${usedpercent}% ($outputsize $unit from $available $unit)|es_memory=${size}B;;;;"
exit $STATE_OK
fi
;; status) # Check Elasticsearch status
status=$(echo $esstatus | jshon -e status -u)
shards=$(echo $esstatus | jshon -e indices -e shards -e total -u)
docs=$(echo $esstatus | jshon -e indices -e docs -e count -u)
nodest=$(echo $esstatus | jshon -e nodes -e count -e total -u)
nodesd=$(echo $esstatus | jshon -e nodes -e count -e data -u)
relocating=$(echo $eshealth | jshon -e relocating_shards -u)
init=$(echo $eshealth | jshon -e initializing_shards -u)
unass=$(echo $eshealth | jshon -e unassigned_shards -u)
if [ "$status" = "green" ]; then
echo "ES SYSTEM OK - Elasticsearch Cluster is green (${nodest} nodes, ${nodesd} data nodes, ${shards} shards, ${docs} docs)|total_nodes=${nodest};;;; data_nodes=${nodesd};;;; total_shards=${shards};;;; relocating_shards=${relocating};;;; initializing_shards=${init};;;; unassigned_shards=${unass};;;; docs=${docs};;;;"
exit $STATE_OK
elif [ "$status" = "yellow" ]; then
echo "ES SYSTEM WARNING - Elasticsearch Cluster is yellow (${nodest} nodes, ${nodesd} data nodes, ${shards} shards, ${relocating} relocating shards, ${init} initializing shards, ${unass} unassigned shards, ${docs} docs)|total_nodes=${nodest};;;; data_nodes=${nodesd};;;; total_shards=${shards};;;; relocating_shards=${relocating};;;; initializing_shards=${init};;;; unassigned_shards=${unass};;;; docs=${docs};;;;"
exit $STATE_WARNING
elif [ "$status" = "red" ]; then
echo "ES SYSTEM CRITICAL - Elasticsearch Cluster is red (${nodest} nodes, ${nodesd} data nodes, ${shards} shards, ${relocating} relocating shards, ${init} initializing shards, ${unass} unassigned shards, ${docs} docs)|total_nodes=${nodest};;;; data_nodes=${nodesd};;;; total_shards=${shards};;;; relocating_shards=${relocating};;;; initializing_shards=${init};;;; unassigned_shards=${unass};;;; docs=${docs};;;;"
exit $STATE_CRITICAL
fi
;; *) help
esac
转
https://www.claudiokuenzler.com/monitoring-plugins/check_es_system.php