shell循環執行sparkSql程序
[hadoop@slave106 test]$ cat start.sh
# 拿到random所有文件名,寫入random.txt
hdfs dfs -ls /yk/data/random | awk '{print $8}' > random.txt
# 去除空行
cat random.txt |sed -e '/^$/d' > random
# 拿到sni所有文件名,寫入sni_file
hdfs dfs -ls /yk/data/sni | awk '{print $8}' > sni.txt
# 去除空行
cat sni.txt | sed -e '/^$/d' > sni
# 把兩個文件合併
paste -d , random sni > filenames
#rm random.txt
#rm sni.txt
for line in `cat filenames`
do
arr=(${line
suiji=${arr[0]}
sni=${arr[1]}
filename=`echo ${suiji:0-16}`
filename="hdfs://slave106/yk/out/1/"$filename
./sparksql $suiji $sni $filename
done
[hadoop@slave106 test]$ cat sparksql
/home/hadoop/soft/spark/bin/spark-submit \
--master spark://172.16.2.106:7077 \
--class sql.cluster.SparkSqlJoinDemo \
/home/hadoop/test/jars/SparkDemo-1.0-SNAPSHOT-jar-with-dependencies.jar \
$1 $2 $3