創建集羣
集羣各個節點名稱
192.168.100.101 Node1
192.168.100.102 Node2
192.168.100.103 Node3
192.168.100.104 Node4
配置各個節點的互信
在各個節點上添加用戶名爲”mongo”,密碼爲”1”的用戶。
在Node1上執行以下腳本:
#!/bin/sh
## 1 delete .ssh directory
UserName="mongo"
rm -rf ~/.ssh
ssh-keygen -t rsa
#ssh-keygen -t dsa
for ((i=2; i<=4; i++));
do
ssh $UserName@Node$i 'rm -rf ~/.ssh; ssh-keygen -t rsa -f ~/.ssh/id_rsa -P "";exit '
#ssh-keygen -t dsa
#ssh -l $UserName Node$i 'rm -rf ~/.ssh;ssh-keygen -t rsa -f ~/.ssh/id_rsa -P "";exit'
done;
# 2 copy public keys to one file
#ssh $UserName@Node1
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
#cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
for ((i=2;i<=4;i++));
do
ssh $UserName@Node$i cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
#ssh $UserName@Node$i cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
done;
## 3 Dispath authorized_keys to other machines and change file property
chmod 600 ~/.ssh/authorized_keys
for ((i=2;i<=4;i++));
do
scp ~/.ssh/authorized_keys $UserName@Node$i:~/.ssh/
ssh $UserName@Node$i 'chmod 600 ~/.ssh/authorized_keys'
done;
下載和處理測試數據
在中國地震信息網下載統計數據,下載地址:http://www.csi.ac.cn/publish/main/1/100245/index.html。
將Excel中多餘的數據去掉,並保存CSV的格式,具體格式如下,
用文本編輯器打開csv格式的文件,並保存的UTF8的格式,命名爲dz-data-utf8.csv。
安裝Mongodb
下載mongodb,官網地址:www.mongodb.org
解壓mogodb:
[mongo@Node1 software]$ tar xvf mongodb-linux-x86_64-2.4.2.tar
mogodb存放路徑:
[mongo@Node1 mongodb2.4.2]$ /home/mongo/src/mongodb2.4.2
將Mogodb分發到Node2,Node3,Node4
scp -r src node2:~/
scp -r src node3:~/
scp -r src node4:~/
使用Mongodb
在節點Node3和Node4上創建複製集(Replication)
在節點Node3上執行以下操作
//啓動複製集集羣
[mongo@Node3 mongodb2.4.2]$ mkdir -p ~/m0 ~/m1 ~/m2 mlog
[mongo@Node3 mongodb2.4.2]$ ./bin/mongod --dbpath ~/m0 --logpath ~/mlog/m17.log --logappend --port 27017 --fork --repSet rs3 --smallfiles
[mongo@Node3 mongodb2.4.2]$ ./bin/mongod --dbpath ~/m1 --logpath ~/mlog/m18.log --logappend --port 27018 --fork --repSet rs3 --smallfiles
[mongo@Node3 mongodb2.4.2]$ ./bin/mongod --dbpath ~/m2 --logpath ~/mlog/m19.log --logappend --port 27019 --fork --repSet rs3 --smallfiles
//創建複製集
[mongo@Node3 mongodb2.4.2]$ ./bin/mongo
> use admin
> rsconf = {
_id:'rs3',
members:[
{_id:0,host:'192.168.100.103:27017'},
{_id:1,host:'192.168.100.103:27018'},
{_id:2,host:'192.168.100.103:27019'},
]
}
> rs.initiate(rsconf)
在節點Node4上執行以下操作
[mongo@Node4 mongodb2.4.2]$ mkdir -p ~/m0 ~/m1 ~/m2 mlog
[mongo@Node4 mongodb2.4.2]$ ./bin/mongod --dbpath ~/m0 --logpath ~/mlog/m17.log --logappend --port 27017 --fork --repSet rs3 --smallfiles
[mongo@Node4 mongodb2.4.2]$ ./bin/mongod --dbpath ~/m1 --logpath ~/mlog/m18.log --logappend --port 27018 --fork --repSet rs3 --smallfiles
[mongo@Node4 mongodb2.4.2]$ ./bin/mongod --dbpath ~/m2 --logpath ~/mlog/m19.log --logappend --port 27019 --fork --repSet rs3 --smallfiles
//創建複製集
[mongo@Node4 mongodb2.4.2]$ ./bin/mongo
> use admin
> rsconf = {
_id:'rs4',
members:[
{_id:0,host:'192.168.100.104:27017'},
{_id:1,host:'192.168.100.104:27018'},
{_id:2,host:'192.168.100.104:27019'},
]
}
> rs.initiate(rsconf)
創建分片(Shard)
創建 configsvr
[mongo@Node2 mongodb2.4.2]$ ./bin/mongod --dbpath=/home/mongo/m20 --logpath=/home/mongo/mlog/m20.log --fork --configsvr --port=27020
啓動mongos
[mongo@Node2 mongodb2.4.2]$ ./bin/mongos --logpath=/home/mongo/m30.log --configdb=192.168.100.102:27020 --fork
連接mongos並查看分片狀態:
[mongo@Node2 mongodb2.4.2]$ ./bin/mongo --port 27017
mongos> sh.status()
向分片中添加複製集
//添加複製集
mongos> sh.addShard('rs3/192.168.100.103:27017')
{ "shardAdded" : "rs3", "ok" : 1 }
mongos> sh.addShard('rs4/192.168.100.104:27017')
{ "shardAdded" : "rs4", "ok" : 1 }
mongos>
//查看狀態
mongos> sh.status();
--- Sharding Status ---
sharding version: {
"_id" : 1,
"version" : 3,
"minCompatibleVersion" : 3,
"currentVersion" : 4,
"clusterId" : ObjectId("56dab6daf6899ec943eebda3")
}
shards:
{ "_id" : "rs3", "host" : "rs3/192.168.100.103:27017,192.168.100.103:27018,192.168.100.103:27019" }
{ "_id" : "rs4", "host" : "rs4/192.168.100.104:27017,192.168.100.104:27018,192.168.100.104:27019" }
databases:
{ "_id" : "admin", "partitioned" : false, "primary" : "config" }
//將test庫分片
mongos> sh.enableSharding('test')
{ "ok" : 1 }
// 將集合dz按照sn進行分片
mongos> sh.shardCollection('test.dz', {sn:1})
{ "collectionsharded" : "test.dz", "ok" : 1 }
// 制定分片規則
mongos> for(var i=1; i<=50; i++){sh.splitAt('test.dz',{sn:i*1000})}
{ "ok" : 1 }
// 導入數據
./bin/mongoimport -h 192.168.100.102 -d test -c dz --type csv --file ./dz-data-utf8.csv --headerline
使用MapReduce進行統計
首先連接上mongos
mongos>
//統計各個區塊地震出現的次數
//統計各個地方的地址次數
var map = function(){
if (this.jing <0 || this.wei < 0){
return;
}
var j = Math.floor(this.jing/5)*5 ;
var w = Math.floor(this.wei/5)*5 ;
var block = j+':'+w;
emit(block, 1);
}
var reduce = function(block, values){
return Array.sum(values);
}
{out:'res'}
db.dz.mapReduce(map, reduce, {out:'res'})
//統計各個區塊的地址深度
var map = function(){
if (this.jing <0 || this.wei < 0){
return;
}
var j = Math.floor(this.jing/5)*5 ;
var w = Math.floor(this.wei/5)*5 ;
var block = j+':'+w;
emit(block, this.lev);
}
var reduce = function(block, values){
return Array.avg(values);
}
{out:'res'}
db.dz.mapReduce(map, reduce, {out:'res'})
//查看統計結果
db.res.find().sort({value:-1})
各個區塊地址發生的次數統計結果:
各個區塊地址平均級數的統計結果: