我编写了简单的字数统计程序并尝试获取输出
基于较新的API格式的Multipleoutputs,我得到了输出
输出文件中的数据(带有密钥名称):
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class word {
public static class wordmapper extends
Mapper<LongWritable,Text,Text,IntWritable> {
Text x = new Text();
IntWritable z =new IntWritable(1);
String str1=null;
public void map(LongWritable key, Text value,Context con)
throws IOException, InterruptedException {
StringTokenizer str = new StringTokenizer(value.toString());
while(str.hasMoreTokens()) {
str1=str.nextToken();
x.set(str1);
con.write(x, z);
}
}
}
public static class wordreducer extends Reducer<Text,
IntWritable,Text, IntWritable> {
String generateFilename(Text k) { return k.toString(); }
private MultipleOutputs mos;
public void setup(Context context) {
mos =new MultipleOutputs(context);
}
String m=null;
IntWritable num=new IntWritable();
public void reduce(Text key,Iterable<IntWritable> value,Context con)
throws IOException, InterruptedException {
int sum=0;
for(IntWritable k :value) {
sum=sum+k.get();
}
num.set(sum);
m=generateFilename(key);
//mos.write(key, num, "text");
//mos.write("text", key, num);
mos.write("text", key, value, generateFilename(key));
//con.write(key,num);
}
}
public static void main(String args[])
throws IOException, InterruptedException, ClassNotFoundException {
Configuration con = new Configuration();
Job j= new Job(con,"word");
j.setJarByClass(word.class);
j.setMapperClass(wordmapper.class);
j.setReducerClass(wordreducer.class);
//j.setNumReduceTasks(0);
j.setOutputKeyClass(Text.class);
j.setOutputValueClass(IntWritable.class);
MultipleOutputs.addNamedOutput(j,"text",TextOutputFormat.class,Text.class,IntWritable.class);
//MultipleOutputs.addNamedOutput(j, namedOutput, outputFormatClass, keyClass, valueClass);
FileInputFormat.addInputPath(j, new Path(args[0]));
FileOutputFormat.setOutputPath(j, new Path(args[1]));
System.exit(j.waitForCompletion(true)?0:1);
}
}
我在文件中得到了输出,但文件中没有任何数据,并且文件没有打开:
-rw-r--r-- 1 root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/_SUCCESS
drwxr-xr-x - root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/_logs
-rw-r--r-- 1 root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/best-r-00000
-rw-r--r-- 1 root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/good-r-00000
-rw-r--r-- 1 root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/hadoop-r-00000
-rw-r--r-- 1 root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/hello-r-00000
-rw-r--r-- 1 root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/is-r-00000
-rw-r--r-- 1 root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/part-r-00000
-rw-r--r-- 1 root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/rule-r-00000
-rw-r--r-- 1 root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/the-r-00000
-rw-r--r-- 1 root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/we-r-00000
-rw-r--r-- 1 root supergroup 0 2014-09-15 03:00 /user/root/listouput/world9/world-r-00000
root@ubuntu:/home/mrinmoy/Desktop/PracticeJar files# hadoop fs -cat
/user/root/listouput/world9/best-r-00000
最佳答案
输出文件为空,因为您忘记了关闭mos
,需要覆盖化简器的cleanup
方法:
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
mos.close();
}
关于hadoop - Hadoop API中的MultipleOutputs,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/25846240/