MR題目:如何一次Map讀n行數據
輸入數據
{
"name":"ta",
"age":12,
"sex":1
}
{
"name":"la",
"age":13,
"sex":2
}
{
"name":"la",
"age":13,
"sex":2
}
{
"name":"la",
"age":13,
"sex":2
}
{
"name":"la",
"age":13,
"sex":2
}
{
"name":"la",
"age":13,
"sex":2
}
輸出數據
{"name":"la","age":13,"sex":2}
{"name":"la","age":13,"sex":2}
{"name":"la","age":13,"sex":2}
{"name":"la","age":13,"sex":2}
{"name":"la","age":13,"sex":2}
{"name":"ta","age":12,"sex":1}
運行記錄
Map-Reduce Framework
Map input records=6 //從這裏可以看出確實是一次讀的五行
Map output records=6
代碼
job
public class JSONJob {
static class JSONMapper extends Mapper<IntWritable, Text, Text, NullWritable> {
@Override
protected void map(IntWritable key, Text value, Context context) throws IOException, InterruptedException {
context.write(value,NullWritable.get());
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration con = new Configuration();
Job job = Job.getInstance(con);
job.setJarByClass(JSONJob.class);
job.setInputFormatClass(JsonInputFormat.class);
job.setMapperClass(JSONMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("d:/work/in/2"));
FileOutputFormat.setOutputPath(job, new Path("d:/work/out/2"));
boolean result = job.waitForCompletion(true);
System.exit(result ? 0 : 1);
}
}
InputFormat
public class JsonInputFormat extends FileInputFormat<IntWritable, Text> {
@Override
protected boolean isSplitable(JobContext context, Path filename) {
return false;
}
@Override
public RecordReader<IntWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
return new FiveLineRecordReader(split, context);
}
class FiveLineRecordReader extends RecordReader<IntWritable, Text> {
FileSplit fileSplit;
Configuration conf;
Boolean progress;
IntWritable lineNum;
Text value;
public FiveLineRecordReader(InputSplit fileSplit, TaskAttemptContext context) throws IOException, InterruptedException {
initialize(fileSplit, context);
}
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
this.fileSplit = (FileSplit) split;
conf = context.getConfiguration();
lineNum = new IntWritable(0);
progress = true;
value = new Text();
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
if (progress) {
String path = fileSplit.getPath().toUri().getPath();
path = path.substring(1);
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(path)));
int lineNumNow = lineNum.get();
for (int i = 0; i < lineNumNow; i++) {
bufferedReader.readLine();
}
StringBuffer json = new StringBuffer();
for (int i = 0; i < 5; i++) {
String tem = bufferedReader.readLine();
if (tem != null) {
json.append(tem);
} else break;
}
value.set(json.toString());
lineNum.set(lineNum.get()+5);
if (bufferedReader.readLine() == null) {
progress = false;
}
return true;
}
return false;
}
@Override
public IntWritable getCurrentKey() throws IOException, InterruptedException {
return lineNum;
}
@Override
public Text getCurrentValue() throws IOException, InterruptedException {
return value;
}
@Override
public float getProgress() throws IOException, InterruptedException {
return progress == true ? 0 : 1;
}
@Override
public void close() throws IOException {
}
}
}