1.提取源端和目標端單獨存在的表名在前兩個區域顯示,提示alone
2.判斷剩下共有的表的數據差距,若超出閾值,則在第三個區域進行顯示,提示abnormal
3.判斷剩下共有的表的數據差距,若相等或在閾值之內,則認爲正常進行輸出,提示normal
用法 sh showdiff A.txt B.txt 10(閾值)
A.txt
名稱 數值
B.txt
名稱 數值
輸出結果:
table in this section are alone in src:have no one
table in this section are alone in src:have no one
tables in this section are abnormal:
tables in this section are normal:
#!/bin/bash
sort $1 -k 1 > src_sorted.tmp
sort $2 -k 1 > dst_sorted.tmp
echo -e "\n\n" > compare_results.txt;
awk '{printf "%s\n",$1}' src_sorted.tmp | sort -k 1 > srcname_sorted.tmp
awk '{printf "%s\n",$1}' dst_sorted.tmp | sort -k 1 > dstname_sorted.tmp
#find alone table_names and prevent them into the diff caculation
comm srcname_sorted.tmp dstname_sorted.tmp -2 -3 > src_alone.tmp
comm srcname_sorted.tmp dstname_sorted.tmp -1 -3 > dst_alone.tmp
grep "." src_alone.tmp > /dev/null
#find alone tables in src and show them
if [ $? -eq 0 ];then
tables_cnt=`cat src_alone.tmp | wc -l`
echo -e "$tables_cnt tables in this section are alone in src:\n" >> compare_results.txt;
awk '{printf "\" %s \"\n",$1}' src_alone.tmp | xargs -I {} grep {} src_sorted.tmp | xargs printf "%50s\t%20d\n" >> compare_results.txt;
else
echo -e "table in this section are alone in src:have no one\n" >> compare_results.txt;
fi
#find alone tables in dst and show them
grep "." dst_alone.tmp > /dev/null
if [ $? -eq 0 ];then
tables_cnt=`cat dst_alone.tmp | wc -l`
echo -e "$tables_cnt tables in this section are alone in dst:\n" >> compare_results.txt;
awk '{printf "\" %s \"\n",$1}' dst_alone.tmp | xargs -I {} grep {} dst_sorted.tmp | xargs printf "%50s\t%20d\n" >> compare_results.txt;
else
echo -e "table in this section are alone in src:have no one\n" >> compare_results.txt;
fi
#produce sed args to elimate alone table_name in both sorted text
grep "." src_alone.tmp > /dev/null
#elimate alone table in src
if [ $? -eq 0 ];then
awk '{printf "\"/ %s .*/d\"\n",$1}' src_alone.tmp | xargs -I {} sed -i {} src_sorted.tmp
sort -k 1 src_sorted.tmp > src_elimated_sorted.tmp
else
cp src_sorted.tmp src_elimated_sorted.tmp
fi
grep "." dst_alone.tmp > /dev/null
#elimate alone table in dst
if [ $? -eq 0 ];then
awk '{printf "\"/ %s .*/d\"\n",$1}' dst_alone.tmp | xargs -I {} sed -i {} dst_sorted.tmp
sort -k 1 dst_sorted.tmp > dst_elimated_sorted.tmp
else
cp dst_sorted.tmp dst_elimated_sorted.tmp
fi
#find data of which diff is in the threshold
comm src_elimated_sorted.tmp dst_elimated_sorted.tmp -2 -3 > src_elimated_sorted_diff.tmp
comm src_elimated_sorted.tmp dst_elimated_sorted.tmp -1 -3 > dst_elimated_sorted_diff.tmp
paste src_elimated_sorted_diff.tmp dst_elimated_sorted_diff.tmp | awk -v thres=$3 'BEGIN{
print "tables in this section are abnormal:" >> "compare_results.txt";
printf "%50s\t%20s\t%20s\t%20s\n","TABLE NAME","SROUUCE COUNT","DEST COUNT","DIFFERENCE" >> "compare_results.txt";
}
{
table_name=$1;
count_src=$2;
count_dst=$4
diff=count_src-count_dst;
if ((diff+0 <= thres) && (diff+0 >= -thres))
{
#this data of which diff is in threshold, but not equal
printf "%50s\t%20d\t%20d\t%20d\n",table_name,count_src,count_dst,diff > "inthreshold.tmp";
}
else
{
#data of which diff is not in the threshold
printf "%50s\t%20d\t%20d\t%20d\n",table_name,count_src,count_dst,diff > "compare_results.txt";
}
}'
echo "--------------------------------------------------------------------------------------------------------------------------------------" >> compare_results.txt
#sort data diff is in threshold or is equal
comm src_elimated_sorted.tmp dst_elimated_sorted.tmp -1 -2 | awk '{printf "%50s\t%20d\t%20d\t%20d\n", $1,$2,$2,0}'>> inthreshold.tmp
cat inthreshold.tmp | sort -k 1 > inthreshold_sorted.tmp
echo "tables in this section are normal:" >> compare_results.txt;
#format the result(data in theshold)
printf "%50s\t%20d\t%20d\t%20d\n" $(cat inthreshold_sorted.tmp) >> compare_results.txt
#rm -rf *.tmp