happybase操作Hbase語法:
import happybase
## 鏈接HBase數據庫
conn = happybase.Connection(host=’localhost’, port=9090, timeout=None, autoconnect=True, table_prefix=None, table_prefix_separator=b’_’, compat=’0.98’, transport=’buffered’, protocol=’binary’)
## 創建表
conn.create_table(
'shop',
{
'interfaceInfo' :dict(max_version=4),
'inputInfo' :dict(max_version = 4)
}
)
#插入
table = conn.table("shop")
with table.batch() as bat:
bat.put('0001',{'interfaceInfo:inter_show':'HDM1', 'interfaseInfo:inter_network':'10Mbps', 'interfaceInfo:inter_three':'1個','interfaceInfo:inter_Type-c':'1個'})
bat.put('0001',{'inputInfo:input_one':'有指點杆','inputInfo:input_tow':'全尺寸鍵盤','inputInfo:input_three':'多點觸控','inputInfo:input_four':'多點觸控'})
MapReduce語法(變壓器案例爲例)
mapper.py
#! /usr/bin/python3
# mapper處理之後的結果會自動按照key排序
import sys
def mapper(line):
key = float(line.split(",")[2])
cat = ''
if key <= 630.00:
cat = "mini"
elif key <= 6300:
cat = "mid"
else:
cat = "over"
print("%s\t%s" % (cat, 1))
def main():
for line in sys.stdin:
line = line.strip()
if line.startswith('child'):
break
else:
mapper(line)
if __name__ == '__main__':
main()
reduce.py
#! /usr/bin/python3
import sys
def reducer(k, values):
print("%s:\t:%s" % (k, sum(values)))
def main():
current_key = None
values = []
akey, avalue = None, None
for line in sys.stdin:
line = line.strip()
try:
akey, avalue = line.split('\t')
except:
continue
if current_key == akey:
values.append(int(avalue))
else:
if current_key:
reducer(current_key, values)
values = []
values.append(int(avalue))
current_key = akey
if current_key == akey:
reducer(current_key, values)
if __name__ == '__main__':
main()
Spark語法
兩種方式創建rdd
# 1.初始化 SparkContext,該對象是 Spark 程序的入口,‘Simple App’是程序的名字,一般自定義要通俗易懂
sc = SparkContext("local", "Simple App")
# 如果是使用自己寫入的文件的話,就用parallelize創建rdd
# 2.創建一個1到5的列表List
data = [i for i in range(1,6)]
# 3.通過 SparkContext 並行化創建 rdd
rdd = sc.parallelize(data)
# 如果是使用外部文件創建rdd的話,按照如下語句
rdd = textFile("/root/wordcount.txt")
接着就是用所需要的算子來完成任務
算子部分edu的網址如下:
https://www.educoder.net/shixuns/imf67y2q/challenges