检测24小时集群日志完整性
#/bin/sh
source /etc/profile
export LANG=zh_CN.UTF-8

function info(){
        echo "`date +"%Y-%m-%d %H:%M:%S"` [sessionlog-run-condition] $*"
}
cmd=$0
format="$1/$2/$3"
dt=`date -d "-1 day" +%Y" "%m" "%d`
info "checking $format $dt"

for hour in `seq 0 23`;do
        hour=`printf %02d $hour`
        target=`printf $format $dt $hour`
        hadoop fs -test -e $target
        exist=$?
        sended_sms=0
        while ([ $exist -eq 1 ]);do
                msg="WARNING: sessionlog dependency $target not exists, sessionlog would stuck here. Check $cmd for detailes"
                if [ $sended_sms -eq 0 ] ;then
                        sh /home/flume/new_log_monitor/weixin.sh "$msg"
                        info $msg
                        sended_sms=1
                fi
                sleep 5s
                hadoop fs -test -e $target
                exist=$?
        done
        info $target checked
done

msg="sessionlog-dependent $1 check passed"
info $msg
if [ $sended_sms -eq 1 ];then
  sh /home/flume/new_log_monitor/weixin.sh "$msg"

检测每小时集群日志完整性
#!/bin/bash
base_dir=/home/flume/new_log_monitor
source /etc/profile
topic=$1
path=$2
partition_num=$3
#pathdt=`date +%Y/%m/%d/%H`
pathdt=$4
hour_path="$path/$pathdt"
hadoop fs -test -e $hour_path
flag=$?
_create_count=1
while ([ $flag -eq 1 ])
do
  hadoop fs -test -e $hour_path
  flag=$?
  echo "${hour_path}路径不存在"
  if [ $flag -eq 1 ];then
   let "_create_count++"
   if [ $_create_count -eq 5 ] ;then
       echo "sms ---$hour_path目录不存在"
       sh $base_dir/sms.sh ${hour_path}日志未生成
       _create_count=0
   fi
   sleep 3m
  fi
done
echo $hour_path
_count=0
sended_sms=0
while ([ $file_count -lt  $partition_num ])
do
    file_count=`hadoop fs -ls $hour_path/*.lzo|awk -F / '{print $NF}'| awk -F - '{print $1}'`
    echo "$topic时间$hour_path文件分区数$file_count" 

   if [ $file_count -lt  $partition_num ] ;then
     echo "$hour_path文件数为$file_count,不为$partition_num"
     if [ $sended_sms -eq 0 ] ;then
       sh $base_dir/sms.sh ${hour_path}日志分区不完整
       sended_sms=1
     fi
     sleep 3m
   else
      if [ $sended_sms -eq 1 ];then
       sh $base_dir/sms.sh ${hour_path}日志所有分区已生成
      fi
   fi
done
exit 0                                          

检测数据库记录
#/bin/sh
source /etc/profile

time=`date -d "-1 hour" +%Y/%m/%d/%H`
dp=`date -d "-1 hour" +%Y%m%d%H`
check_hour=`date +%Y/%m/%d/%H`
date
base_dir=/home/flume/new_log_monitor

#检查数据库是否正常
sh /home/flume/new_log_monitor/check_oracle.sh
#nginx数据从mysql同步到oracle
job_jar=$base_dir/lib/log-monitor-job-0.0.1-SNAPSHOT-jar-with-dependencies.jar
java -cp /home/flume/new_log_monitor/lib/log-monitor-job-0.0.1-SNAPSHOT-jar-with-dependencies.jar -Dconfig=$base_dir/config/redis2oracle.properties com.sohu.redis2oracle.SyncRedis2Oracle $time
#配置文件
config_file=$base_dir/config/monitor.config
cat $config_file| while read line;do
 while [ `ps -ef | grep "log-monitor-mr-job.sh" | wc -l` -gt 30 ];do
  sleep 3s
  echo "`date +"%Y-%m-%d %H:%M:%S"` too many tasks submitted,waiting..."
 done
 topic=`echo $line |awk '{ print $1;}'`
 inputpath=`echo $line |awk '{ print $2;}'`
 partitonsNum=`echo $line |awk '{ print $3;}'`
#某个topic某个小时有多少条数据,读取oracle的nginx数据,作比较并将标志存储数据库
 nohup sh $base_dir/sh/log-monitor-mr-job.sh $time $topic $inputpath $partitonsNum $check_hour>>$base_dir/logs/${dp}_${topic}.log  2>&1 &
done
date
udata_date=`date -d "-2 hour" +%Y/%m/%d`
hour=`date -d "-2 hour" +%H`
08-31 21:42