我的输出出错了.输入文件是:
1 2 3 4
5 4 3 2
输出应该是关键:总和值:24
MapReduce产生的输出:key:总和值:34
我在Ubuntu 14.04中使用OpenJDK 7来运行jar文件,而jar文件是在Eclipse Juna中创建的,使用的java版本是Oracle JDK 7来编译它.NumberDriver.java
包装数量;
import java.io.*; //import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; //import org.apache.hadoop.mapreduce.Mapper; //import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; public class NumberDriver { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { // TODO Auto-generated method stub Configuration conf=new Configuration(); String[] otherArgs=new GenericOptionsParser(conf,args).getRemainingArgs(); if(otherArgs.length!=2) { System.err.println("Error"); System.exit(2); } Job job=new Job(conf, "number sum"); job.setJarByClass(NumberDriver.class); job.setMapperClass(NumberMapper.class); job.setReducerClass(NumberReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true)?0:1); } }
NumberMapper.java
package numbersum; import java.io.*; import java.util.StringTokenizer; //import org.apache.hadoop.conf.Configuration; //import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; //import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; //import org.apache.hadoop.mapreduce.Reducer; //import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; //import org.apache.hadoop.util.GenericOptionsParser; //import org.hsqldb.Tokenizer; public class NumberMapper extends Mapper{ int sum; public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException { StringTokenizer itr=new StringTokenizer(value.toString()); while(itr.hasMoreTokens()) { sum+=Integer.parseInt(itr.nextToken()); } context.write(new Text("sum"),new IntWritable(sum)); } }
NumberReducer.java
package numbersum; import java.io.*; //import java.util.StringTokenizer; //import org.apache.hadoop.conf.Configuration; //import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; //import org.apache.hadoop.mapreduce.Job; //import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; //import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; //import org.apache.hadoop.util.GenericOptionsParser; public class NumberReducer extends Reducer{ public void reduce(Text key,Iterable values, Context context)throws IOException, InterruptedException { int sum=0; for(IntWritable value:values) { sum+=value.get(); } context.write(key,new IntWritable(sum)); } }
UltraInstinc.. 6
我最好的猜测:
int sum; // <-- Why a class member? public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException { int sum = 0; //Why not here? StringTokenizer itr=new StringTokenizer(value.toString());
猜测推理:第1张地图:1 + 2 + 3 + 4 = 10第2张地图:(10 +)2 + 3 + 4 + 5 = 34
..意思是,前一个值正在保留.
我最好的猜测:
int sum; // <-- Why a class member? public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException { int sum = 0; //Why not here? StringTokenizer itr=new StringTokenizer(value.toString());
猜测推理:第1张地图:1 + 2 + 3 + 4 = 10第2张地图:(10 +)2 + 3 + 4 + 5 = 34
..意思是,前一个值正在保留.