參考資料:
機器學習實戰
Mapper
'''
@version: 0.0.1
@Author: tqrs
@dev: python3 vscode
@Date: 2019-11-12 22:53:08
@LastEditTime: 2019-11-12 23:08:24
@FilePath: \\機器學習實戰\\15-MapReduce\\mrMeanMapper.py
@Descripttion: 分佈式計算均值和方差的mapper
'''
import sys
import numpy as np
def read_input(file):
for line in file:
yield line.rstrip()
input = read_input(sys.stdin)
input = [float(line) for line in input]
numInputs = len(input)
input = np.mat(input)
sqInput = np.power(input, 2)
print("%d\t%f\t%f" % (numInputs, np.mean(input), np.mean(sqInput)))
Reducer
'''
@version: 0.0.1
@Author: tqrs
@dev: python3 vscode
@Date: 2019-11-12 23:02:44
@LastEditTime: 2019-11-12 23:25:23
@FilePath: \\機器學習實戰\\15-MapReduce\\mrMeanReducer.py
@Descripttion: 分佈式計算均值和方差的reducer
'''
import sys
def read_input(file):
for line in file:
yield line.rstrip()
input = read_input(sys.stdin)
mapperOut = [line.split('\t') for line in input]
cumVal = 0.0
cumSumSq = 0.0
cumN = 0.0
for instance in mapperOut:
nj = float(instance[0])
cumN += nj
cumVal += nj * float(instance[1])
cumSumSq += nj * float(instance[2])
mean = cumVal / cumN
meanSq = cumSumSq / cumN
print("%d\t%f\t%f" % (cumN, mean, meanSq))
print(sys.stderr, "report: still alive")