squid內存監控腳本

oom_of_squid
用於監控服務器上的squid進程,保證對用戶服務的穩定。
此進程意欲以守護進程模式運行,可以這樣使用:./oom_of_squid &
主要功能:
監控配置過的每個squid進程,保證它的內存佔用看似正常;
確保進程PID和PID文件一致;
squid進程掛掉後啓動之;
squid進程佔用的內存超過設置的閥值則重啓之;
系統可用內存低於閥值,則選擇一個squid進程重啓之;
默認每3秒檢查一次;

注意事項:
`echo squid7[0-9].conf`在gentoo和CentOS上的行爲不一樣,到CentOS下應用可能需要改爲`ls squid7[0-9].conf`;
/proc/$PID/comm 在較新的內核上纔有這個接口,使用舊內核需要另想辦法;
/etc/init.d/squid_multi_instance是我重寫過的進程管理腳本(還有bug),使用“標準”腳本的人需要自己改一下;
系統可用內存閥值應該適當高於系統自身的oom閥值;
用於全內存式緩存更好,因爲不用顧忌狀態文件的同步。
我設置的配置文件中,squid7[0-9].conf是carp-child,用於緩存文件;squid8[0-9].conf是carp-parent,用於決定URL在集羣中的分佈。

附件爲腳本。望用的上的同仁多提寶貴意見!!!

#!/bin/bash
# 功能:
#    在系統可用內存很小時選取一個squid進程重啓
#    檢查各進程健康狀況

MAIL="[email protected]"        # 設置自己的郵箱
MAIL_FROM="[email protected]"
CHECK_INTERVAL="3s"        # 檢查時間間隔
THRESHOLD_LOW_MEM="90"            # MB,低內存閥值; 必須大於vm.min_free_kbytes
THRESHOLD_CHILD_MAX_MEM="200"    # MB,child持有內存大於此值就重啓
THRESHOLD_EMERG="100"            # MB,LEVEL="emerg"時重啓進程的閥值
LEVEL="normal"    # 內存級別,可用內存較低時設置爲emerg; 正常情況下應該是空變量
LOG="$0.log"

# 探測配置文件,70-79之間的端口爲parent,80-87爲child
CONFIG_LOCATION="/etc/squid/"
PARENT_CONF=$(cd $CONFIG_LOCATION; echo squid7[0-9].conf)
CHILD_CONF=$(cd $CONFIG_LOCATION; echo squid8[0-9].conf)
ALL_CONF="$PARENT_CONF $CHILD_CONF"

# disable killing by oom
echo "-17" > /proc/self/oom_adj

all_free_mem() {
    free -| awk '/^Mem:/ {print ($4 + $6 + $7)}'    # 單位是MB
}
squid_instance_mem() {
    INSTANCE=${CONF%.conf}
    PID_FILE="/var/run/$INSTANCE.pid"
    [[ -e $PID_FILE ]] && PID_FROM_FILE=`cat $PID_FILE` || PID_FROM_FILE=""
    # 檢查名稱和進程號是否對應
    if [ -e $PID_FILE ]; then
        [[ x$PID_FROM_FILE != "x" ]] && grep -q squid /proc/$PID_FROM_FILE/comm || false
    else
        echo "$INSTANCE pid file not match to squid" >> $LOG
    fi
    # 佔用的內存數量
    RSS=`awk '/^VmRSS:/ {printf "%d\n", $2/1024}' /proc/$PID_FROM_FILE/status`
}
keep_processes_health() {
    # 檢查所有進程,如果掛了,修正一下
    for CONF in $ALL_CONF; do
        INSTANCE=${CONF%.conf}
        PID_FILE="/var/run/$INSTANCE.pid"
        STATE_FILE="/var/lib/init.d/started/$INSTANCE"
        [[ -e $PID_FILE ]] && PID_FROM_FILE=`cat $PID_FILE` || PID_FROM_FILE=""
        PID_RUNNING=`ps axo user,pid,cmd | awk '/^squid/ && /'$CONF'/ {print $2}'`

        if [ x$PID_RUNNING = "x" ]; then
            # 進程不存在: 啓動進程
            process_state="not_running"
            echo "`date +%F\ %T` $INSTANCE state is $process_state, restarted" >> $LOG
            [[ -e $PID_FILE ]] && /bin/rm -f $PID_FILE
            [[ -e $STATE_FILE ]] && /bin/rm -f $STATE_FILE
            /etc/init.d/squid_multi_instance start ${INSTANCE#squid} >/dev/null 2>&1
            notify_admin &
        elif [ x$PID_FROM_FILE = "x" ]; then
            # pid文件有問題: 修正
            process_state="bad_pid_file"
            if echo $PID_RUNNING > $PID_FILE; then
                process_state="good"
                echo "fixed pid file of $INSTANCE at `date +%F\ %T`" >> $LOG
            else
                echo "can not write $PID_FILE" >> $LOG
                process_state="pid_file_not_writeable"
                notify_admin &
            fi
        elif [ x$PID_FROM_FILE != x$PID_RUNNING ]; then
            # 什麼情況下會這樣呢...
            process_state="pid_not_equal"
            if echo $PID_RUNNING > $PID_FILE; then
                echo "fixed $process_state of $INSTANCE at `date +%F\ %T`" >> $LOG
                notify_admin &
            else
                echo "can not write $PID_FILE" >> $LOG
                process_state="$process_state pid_file_not_writeable"
                notify_admin &
            fi
        elif [ x$PID_FROM_FILE = x$PID_RUNNING ]; then
            # 運行良好
            process_state="good"
            continue
        else
            process_state="unknow"
            notify_admin &
        fi
    done
    unset process_state
}
restart_process() {
    INSTANCE=${CONF%.conf}
    /etc/init.d/squid_multi_instance restart ${INSTANCE#squid} >/dev/null 2>&1
    notify_admin &
}
pick_and_restart_parent() {
    # 重啓佔用內存最大的進程
    for CONF in $PARENT_CONF; do
        squid_instance_mem
        PARENT_MEM="$PARENT_MEM\n$RSS $CONF"
    done
    PARENT_MEM_MAX=`echo -e $PARENT_MEM | sort -| tail -1`
    if [ -n $PARENT_MEM_MAX ]; then
        CONF=`echo $PARENT_MEM_MAX | awk '{print $2}'`
        restart_process
        echo "restarted parent ${CONF%.conf} at `date +%F\ %T`" >> $LOG
    else
        echo "unknow error in pick_and_restart_parent" >> $LOG
    fi
}
pick_and_restart_child() {
    # 正常情況下重啓內存大於300M的進程; 緊急情況下重啓大於100M的進程
    for CONF in $CHILD_CONF; do
        squid_instance_mem
        if [ $RSS -gt $THRESHOLD_CHILD_MAX_MEM ]; then
            process_state="over_THRESHOLD_CHILD_MAX_MEM"
            echo "$process_state restarted child ${CONF%.conf} at `date +%F\ %T`" >> $LOG
            restart_process
        else
            true
        fi
    done
    if [ x$LEVEL = "xemerg" ]; then
        for CONF in $CHILD_CONF; do
            squid_instance_mem
            [[ $RSS -gt $THRESHOLD_EMERG ]] && (process_state="over_THRESHOLD_EMERG"; restart_process)
        done
    else
        true
    fi
}
pick_and_restart_one() {
    # restart a child first
    pick_and_restart_child
    
    # memory still low, restart a parent
    [[ `all_free_mem` -lt $THRESHOLD_LOW_MEM ]] && pick_and_restart_parent
    
    # memory still low, restart all child
    [[ `all_free_mem` -lt $THRESHOLD_LOW_MEM ]] && \
    (CONF="child"; restart_process)
}
notify_admin() {
    sendmail --f $MAIL_FROM <<EOF
To: $MAIL
From: $MAIL_FROM
Subject: $INSTANCE on `hostname` restarted

重啓過了 `hostname` 上的 $INSTANCE
進程 $INSTANCE 的最後狀態爲: $process_state

當前系統總剩餘內存爲: `all_free_mem`
.
EOF
}
while true; do
    keep_processes_health

    # child佔用內存大於THRESHOLD_CHILD_MAX_MEM就重啓,不等總內存過低
    pick_and_restart_child

    if [ `all_free_mem` -lt $THRESHOLD_LOW_MEM ]; then
        LEVEL="emerg"
        echo "low memory at `date +%F\ %T`" >> $LOG
        pick_and_restart_one
    else
        LEVEL="normal"
        sleep $CHECK_INTERVAL
    fi
done
# vim: set sw=4 ts=4:

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章