Hadoop WordCount按单词出现次序排序

本文介绍了Hadoop WordCount按单词出现次序排序的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

我需要运行，它会给我所有的单词和他们的事件，但按事件排序，而不是按字母排序。我知道我需要创建两个工作为此并运行一个接一个
我使用了mapper和reducer从

  package org.myorg; 
 
 import java.io.IOException; 
 import java.util。*; 
 
导入org.apache.hadoop.fs.Path; 
 import org.apache.hadoop.io。*; 
 import org.apache.hadoop.mapred。*; 
 import org.apache.hadoop.mapreduce.Job; 
 
 public class WordCount {
 
 public static class Map扩展MapReduceBase实现Mapper< LongWritable，Text，Text，IntWritable> {
 
 private static static IntWritable one = new IntWritable（1）; 
私人文字=新文字（）; 
 $ b $ public void map（LongWritable key，Text value，OutputCollector< Text，IntWritable> output，Reporter reporter）throws IOException {
 String line = value.toString（）; 
 StringTokenizer tokenizer = new StringTokenizer（line）; 
 while（tokenizer.hasMoreTokens（））{
 word.set（tokenizer.nextToken（））; 
 output.collect（word，one）; 
 
 
 
 $ b public static class Reduce extends MapReduceBase implements Reducer< Text，IntWritable，Text，IntWritable> {
 $ b $ public void reduce（Text key，Iterator< IntWritable> values，OutputCollector< Text，IntWritable> output，Reporter reporter）throws IOException {
 int sum = 0; 
 while（values.hasNext（））{
 sum + = values.next（）。get（）; 
} 
 output.collect（key，new IntWritable（sum））; 
} 
} 
 
类Map1扩展MapReduceBase实现Mapper< Object，Text，IntWritable，Text> {
 $ b $ public void map（Object key，Text value，OutputCollector< IntWritable，Text> collector，Reporter arg3）throws IOException {
 String line = value.toString（）; 
 StringTokenizer stringTokenizer = new StringTokenizer（line）; 
 {
 int number = 999; 
 String word =empty; 
 
 if（stringTokenizer.hasMoreTokens（））{
 String str0 = stringTokenizer.nextToken（）; 
 word = str0.trim（）; 
 
 
 if（stringTokenizer.hasMoreElements（））{
 String str1 = stringTokenizer.nextToken（）; 
 number = Integer.parseInt（str1.trim（））; 
} 
 collector.collect（new IntWritable（number），new Text（word））; 
} 
 
} 
 
} 
 
 class Reduce1扩展MapReduceBase实现Reducer< IntWritable，Text，IntWritable，Text> {
 $ b $ public void reduce（IntWritable key，Iterator< Text> values，OutputCollector< IntWritable，Text> arg2，Reporter arg3）抛出IOException {
 while（（values.hasNext（））） {
 arg2.collect（key，values.next（））; 
 
 
 
 
 $ b public static void main（String [] args）throws Exception {
 JobConf conf = new JobConf（ WordCount.class）; 
 conf.setJobName（wordCount）; 
 
 conf.setOutputKeyClass（Text.class）; 
 conf.setOutputValueClass（IntWritable.class）; 
 
 conf.setMapperClass（Map.class）; 
 conf.setCombinerClass（Reduce.class）; 
 conf.setReducerClass（Reduce.class）; 
 
 conf.setInputFormat（TextInputFormat.class）; 
 conf.setOutputFormat（TextOutputFormat.class）; 
 
 FileInputFormat.setInputPaths（conf，new Path（args [0]））; 
 FileOutputFormat.setOutputPath（conf，new Path（/ tmp / temp））; 
 
 //JobClient.runJob(conf）; 
 // -------------------------------------------- ---------------------- 
 JobConf conf2 = new JobConf（WordCount.class）; 
 conf2.setJobName（WordCount1）; 
 
 conf2.setOutputKeyClass（Text.class）; 
 conf2.setOutputValueClass（IntWritable.class）; 
 
 conf2.setMapperClass（Map1.class）; 
 conf2.setCombinerClass（Reduce1.class）; 
 conf2.setReducerClass（Reduce1.class）; 
 
 conf2.setInputFormat（TextInputFormat.class）; 
 conf2.setOutputFormat（TextOutputFormat.class）; 
 
 FileInputFormat.setInputPaths（conf2，new Path（/ tmp / temp / part-00000））; 
 FileOutputFormat.setOutputPath（conf2，new Path（args [1]））; 
 
工作job1 =新工作（conf）; 
工作job2 =新工作（conf2）; 
 
 job1.submit（）; 
 if（job1.waitForCompletion（true））{
 job2.submit（）; 
 job1.waitForCompletion（true）; 
} 
 
} 
}

这不是工作，我应该改变什么，或者为什么它不工作???如果程序运行到：

  INFO input.FileInputFormat：要输入的总输入路径：1

然后问题在于你的最后一行：

$ $ $ $ $ $ $ $ $ $> job2.submit（） ;

作业已提交但未排入队列进行处理。试试这个：

  job1.submit（）; 
 if（job1.waitForCompletion（true））{
 job2.submit（）; 
 job2.waitForCompletion（true）; 
}

来处理您的分拣机MR作业。我已经用MR和流程工程的新API试过了你的代码。

只需添加最后一行即可。

I need to run WordCount which will give me all the words and their occurrences but sorted by the occurrences and not by the alphabet
I understand that I need to create two jobs for this and run one after the otherI used the mapper and the reducer from Sorted word count using Hadoop MapReduce
package org.myorg; import java.io.IOException; import java.util.*; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.*; import org.apache.hadoop.mapred.*; import org.apache.hadoop.mapreduce.Job; public class WordCount { public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { word.set(tokenizer.nextToken()); output.collect(word, one); } } } public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> { public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { int sum = 0; while (values.hasNext()) { sum += values.next().get(); } output.collect(key, new IntWritable(sum)); } } class Map1 extends MapReduceBase implements Mapper<Object, Text, IntWritable, Text> { public void map(Object key, Text value, OutputCollector<IntWritable, Text> collector, Reporter arg3) throws IOException { String line = value.toString(); StringTokenizer stringTokenizer = new StringTokenizer(line); { int number = 999; String word = "empty"; if (stringTokenizer.hasMoreTokens()) { String str0 = stringTokenizer.nextToken(); word = str0.trim(); } if (stringTokenizer.hasMoreElements()) { String str1 = stringTokenizer.nextToken(); number = Integer.parseInt(str1.trim()); } collector.collect(new IntWritable(number), new Text(word)); } } } class Reduce1 extends MapReduceBase implements Reducer<IntWritable, Text, IntWritable, Text> { public void reduce(IntWritable key, Iterator<Text> values, OutputCollector<IntWritable, Text> arg2, Reporter arg3) throws IOException { while ((values.hasNext())) { arg2.collect(key, values.next()); } } } public static void main(String[] args) throws Exception { JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordCount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path("/tmp/temp")); //JobClient.runJob(conf); //------------------------------------------------------------------ JobConf conf2 = new JobConf(WordCount.class); conf2.setJobName("WordCount1"); conf2.setOutputKeyClass(Text.class); conf2.setOutputValueClass(IntWritable.class); conf2.setMapperClass(Map1.class); conf2.setCombinerClass(Reduce1.class); conf2.setReducerClass(Reduce1.class); conf2.setInputFormat(TextInputFormat.class); conf2.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf2, new Path("/tmp/temp/part-00000")); FileOutputFormat.setOutputPath(conf2, new Path(args[1])); Job job1 = new Job(conf); Job job2 = new Job(conf2); job1.submit(); if (job1.waitForCompletion(true)) { job2.submit(); job1.waitForCompletion(true); } } }
It's not working, what should I change here, or why it's not working ???
解决方案
If the program runs until:
INFO input.FileInputFormat: Total input paths to process : 1
then the problem lies in your last line:
job2.submit();
the job has been submitted but not queued to be processed. Try this:
job1.submit(); if (job1.waitForCompletion(true)) { job2.submit(); job2.waitForCompletion(true); }
to process your sorter MR job. I've tried your code with the new API for MR and the flow works.
Just add the last line.

这篇关于Hadoop WordCount按单词出现次序排序的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持！