任务:分析通话记录,查处每个手机号码有哪些打过来的号码
13510921776 10086
13710148751 10086
13914248991 10086
13510921776 13710148751
13510921776 13710148751
13914248991 13710148751
13710148751 13510921776
要求输出结果:
10086 13510921776|13710148751|13914248991|
13510921776 13710148751|
13710148751 13510921776|13510921776|13914248991|
代码:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException; public class PhoneAnalyzer extends Configured implements Tool { enum Counter {
LINESKIP; // 出错的行
} @Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
Job job = new Job(conf, "phoneAnalyzer"); // 任务名
job.setJarByClass(PhoneAnalyzer.class); // 指定Class
FileInputFormat.addInputPath(job, new Path("hdfs://localhost:9000/user/root/in")); // 输入路径
FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:9000/user/root/out")); // 输出路径
job.setMapperClass(Map.class); // 调用Map类作为Mapper任务代码
job.setReducerClass(Reduce.class); // 调用Reduce类作为Reducer任务代码
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class); // 指定输出的Key的格式(KEYOUT)
job.setOutputValueClass(Text.class); // 指定输出的Value的格式(VALUEOUT)
job.waitForCompletion(true);
return job.isSuccessful() ? 0 : 1;
} public static class Map extends
Mapper<LongWritable, Text, Text, Text> { //<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
try {
// key - 行号 value - 一行的文本
String line = value.toString(); //13510000000 10086(13510000000拨打10086)
// 数据处理
String[] lineSplit = line.split(" ");
String phone1 = lineSplit[0];
String phone2 = lineSplit[1];
context.write(new Text(phone2), new Text(phone1)); // 输出 key \t value
} catch (Exception e) {
context.getCounter(Counter.LINESKIP).increment(1); // 出错令计数器+1
}
} } public static class Reduce extends Reducer<Text, Text, Text, Text> { //<KEYIN(必须与Mapper的KEYOUT相同),VALUEIN(必须与Mapper的VALUEOUT相同),KEYOUT,VALUEOUT> @Override
protected void reduce(Text key, Iterable<Text> values,
Context context)
throws IOException, InterruptedException {
String valueStr;
String out = "";
for(Text value:values){
valueStr = value.toString() + "|";
out += valueStr;
}
// 输出 key \t value(如果我们的输出结果不是key \t value格式,那么我们的key可定义为NullWritable,而value使用key与value的组合。)
context.write(key, new Text(out));
}
} public static void main(String[] args) throws Exception {
//运行任务
int res = ToolRunner.run(new Configuration(), new PhoneAnalyzer(), args);
System.exit(res);
}
}