我正在尝试使用两个映射器和一个简化器。
我收到以下错误:
我想要组合多个键,并且期望基于每个键获得其求和的输出。我不知道哪一部分错了。如果您能为我的代码找到一些错误,将不胜感激。
java.io.IOException: Type mismatch in value from map: expected org.apache.hadoop.io.Text, recieved org.apache.hadoop.io.IntWritable
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:896)
at org.apache.hadoop.mapred.MapTask$NewOutputCollector.write(MapTask.java:602)
at org.apache.hadoop.mapreduce.task.TaskInputOutputContextImpl.write(TaskInputOutputContextImpl.java:85)
at org.apache.hadoop.mapreduce.lib.map.WrappedMapper$Context.write(WrappedMapper.java:106)
at CounterTwoMapper.map(CounterTwoMapper.java:28)
at CounterTwoMapper.map(CounterTwoMapper.java:8)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:140)
at org.apache.hadoop.mapreduce.lib.input.DelegatingMapper.run(DelegatingMapper.java:51)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:673)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:331)
at org.apache.hadoop.mapred.Child$4.run(Child.java:268)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(S
。
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class CounterMapper extends Mapper<LongWritable, Text, Text, IntWritable>
{
private Text outkey = new Text();
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException
{
try {
if(value.toString().startsWith("BibNumber"))
{
return;
}
String data[] = value.toString().split(",");
String BookType = data[2];
String DateTime = data[5];
SimpleDateFormat frmt = new SimpleDateFormat("MM/dd/yyyy hh:mm:ss a");
Date creationDate = frmt.parse(DateTime);
frmt.applyPattern("dd-MM-yyyy");
String dateTime = frmt.format(creationDate);
//outkey.set(BookType + " " + dateTime);
outkey.set(BookType + " " + dateTime);
//outUserId.set(userId);
context.write(outkey, new IntWritable(1));
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class CounterTwoMapper extends Mapper<LongWritable, Text, Text, IntWritable>
{
private Text outkey = new Text();
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException
{
if(value.toString().startsWith("BibNumber"))
{
return;
}
String data[] = value.toString().split(",");
String BibNum = data[0];
String Title = data[1];
outkey.set(BibNum + " " + Title);
context.write(outkey, new IntWritable(1));
}
}
导入java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
public class CounterReducer extends Reducer<Text, IntWritable, Text, IntWritable>
{
public void reduce(Text key, Iterable<IntWritable> values, Context context )
throws IOException, InterruptedException
{
int count=0;
for(IntWritable value: values)
{
count++;
}
context.write(key, new IntWritable(count));
}
}
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class MultiInputFile extends Configured implements Tool
{
public int run(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "aggprog");
job.setJarByClass(MultiInputFile.class);
MultipleInputs.addInputPath(job,new Path(args[0]),TextInputFormat.class,CounterMapper.class);
MultipleInputs.addInputPath(job,new Path(args[1]),TextInputFormat.class,CounterTwoMapper.class);
FileOutputFormat.setOutputPath(job, new Path(args[2]));
job.setReducerClass(CounterReducer.class);
job.setNumReduceTasks(1);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
return (job.waitForCompletion(true) ? 0 : 1);
}
public static void main(String[] args) throws Exception {
int ecode = ToolRunner.run(new MultiInputFile(), args);
System.exit(ecode);
}
}
最佳答案
您在实际输出OutputValueClass
的同时将Text
设置为IntWritable
。将其设置为IntWritable
而不是在MultiInputFile
类中。job.setOutputValueClass(IntWritable.class);
关于java - Hadoop,mapreduce java.io.IOException:类型与 map 值不匹配:预期的org.apache.hadoop.io.Text,收到的org.apache.hadoop.io.IntWritable,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/49738570/