MapReduce編程實現矩陣轉置

原創

Tim_long

2018-09-04 12:35

MapReduce實現矩陣的轉置

在大型數據處理中我們經常會用到Hadoop分佈式數據處理技術，矩陣轉置在矩陣相乘算法中是核心算法，而矩陣相乘也是算法中涉及的數學公式常有的。本篇介紹用MapReduce實現矩陣相乘。

首先，搭建eclipse的hadoop開發環境，可以參考這裏。在hdfs根目錄下新建目錄“matrix”，向其中上傳一份事先按照指定格式編寫好的矩陣格式，如下圖

編寫mapper類：

package me.timlong.step1;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class Mapper1 extends Mapper<LongWritable, Text, Text, Text> {
	
	private Text outKey = new Text();
	private Text outValue = new Text();
	
	/*
	 * key : 行號1,2。。。
	 * value ： 1	1_0,2_3,3_-1,4_2,5_-3
	 */
	@Override
	protected void map(LongWritable key, Text value, Context context)
			throws IOException, InterruptedException {
		String[] rowAndLine = value.toString().split("\t");
		
		//矩陣的行號
		String row = rowAndLine[0];
		String[] lines = rowAndLine[1].split(",");
		
		//["1_0","2_3","3_-1","4_2","5_-3"]
		for(int i = 0; i < lines.length; i ++) {
			String column = lines[i].split("_")[0];
			String valueStr = lines[i].split("_")[1];
			
			//key: 列號  value ： 行號_值
			outKey.set(column);
			outValue.set(row + "_" + valueStr);
			context.write(outKey, outValue);
		}
	}
}

編寫reducer類：

package me.timlong.step1;

import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class Reducer1 extends Reducer<Text, Text, Text, Text> {

	private Text outKey = new Text();
	private Text outValue = new Text();
	
//key: 列號 value ： 	[行號_值, 行號_值, 行號_值, ...]	
	@Override
	protected void reduce(Text key, Iterable<Text> values, Context context)
			throws IOException, InterruptedException {
		StringBuilder sb = new StringBuilder();
		
		for(Text text : values) {
			//text : 行號_值
			sb.append(text + ",");
		}	
		String line = null;
		if(sb.toString().endsWith(",")) {
			line = sb.substring(0, sb.length() - 1);
		}	
		outKey.set(key);
		outValue.set(line);
		context.write(outKey, outValue);		
	}	
}

編寫主方法類，包含run()方法，主方法調用：

package me.timlong.step1;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MR1 {

	//輸入文件的相對路徑
	private static String inPath = "/matrix/step1_input/matrix2.txt";
	//輸出文件的相對路徑
	private static String outPath = "/matrix/step1_output";
	//hdfs地址  待修改

	//hdfs = "hdfs://10.255.248.61:9000"
	private static String hdfs = "hdfs://10.255.248.61:9000";

	public int run() {
		try {
			//創建job的配置類
			Configuration conf = new Configuration();
			//設置hdfs地址
			conf.set("fs.defaultFS", hdfs);
			//創建一個job實例
			Job job= Job.getInstance(conf, "step1");

			//設置job主類
			job.setJarByClass(MR1.class);
			//設置job的Mapper類和Reducer類
			job.setMapperClass(Mapper1.class);
			job.setReducerClass(Reducer1.class);

			//設置mapper輸出的類型
			job.setMapOutputKeyClass(Text.class);
			job.setMapOutputValueClass(Text.class);

			//設置reducer輸出的類型
			job.setOutputKeyClass(Text.class);
			job.setOutputValueClass(Text.class);

			FileSystem fs = FileSystem.get(conf);
			//設置輸入和輸出的路徑
			Path inputPath = new Path(inPath);
			if(fs.exists(inputPath)) {
				FileInputFormat.addInputPath(job, inputPath);
			}

			Path outputPath = new Path(outPath);
			fs.delete(outputPath, true);

			FileOutputFormat.setOutputPath(job, outputPath);

			
			System.out.println("run here");
			return job.waitForCompletion(true)? 1 : -1;
		} catch (IOException e) {
			e.printStackTrace();
		} catch (ClassNotFoundException e) {
			e.printStackTrace();
		} catch (InterruptedException e) {
			e.printStackTrace();
		}
		return -1;
	}

	public static void main(String[] args) {
		int result = -1;
		if(1 == result) {
			System.out.println("step1運行成功。。。");
		}else {
			System.out.println("step1運行失敗。。。");
		}
	}
}

運行成功後在matrix目錄下將會生成一份包含原矩陣轉置之後的txt文件的文件夾“step1_output”，自己嘗試一下吧！

希望每天進步一點！

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

MapReduce編程實現矩陣轉置

Java過濾字符串，獲取

劍指Offer 之剪繩子

Pycharm + Anaconda 機器學習環境搭配置巨坑之解決方案

Apache POI操作excel

VS 項目自帶數據庫連接

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結