I ma getting this error while running this program in mapreduce.this is program of sorting in mapreduce.I can't find out where i am worng. so please would you give me suggestion for this program to solve my problem.
15/02/19 08:59:10 INFO jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
15/02/19 08:59:10 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
15/02/19 08:59:10 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
15/02/19 08:59:11 WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
15/02/19 08:59:11 INFO input.FileInputFormat: Total input paths to process : 1
15/02/19 08:59:16 INFO mapred.JobClient: Runningjob:job_local_0001
15/02/19 08:59:17 INFO mapred.JobClient: map 0% reduce 0%
15/02/19 08:59:18 INFO mapred.MapTask: io.sort.mb = 100
15/02/19 08:59:28 INFO mapred.MapTask: data buffer = 79691776/99614720
15/02/19 08:59:28 INFO mapred.MapTask: record buffer = 262144/327680
15/02/19 08:59:28 INFO mapred.JobClient: Job complete: job_local_0001
15/02/19 08:59:28 INFO mapred.JobClient: Counters: 0
15/02/19 08:59:28 WARN mapred.LocalJobRunner: job_local_0001
java.lang.ArrayIndexOutOfBoundsException: 1
at demo.Sorting$SortingMapper.map(Sorting.java:97)
at demo.Sorting$SortingMapper.map(Sorting.java:1)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:646)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:322)
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:210)
标题:我是mapreduce编程的初学者。我找不到为什么会发生此错误。我在此程序中从未使用过数组,但是我生成了Array Exception。所以有人请帮助我删除此异常
public class Sorting{
public static class Pair implements WritableComparable<Pair>{
private Text t;
private IntWritable i;
import java.util.Comparator;
public void set(Text t, IntWritable i){
this.t = t;
this.i = i;
}
public Text getFirst() { return t; }
public IntWritable getSecond() { return i; }
public Pair()
{
set(new Text(), new IntWritable());
}
public Pair(Text t, IntWritable i)
{
set(t, i);
}
public int compareTo(Pair p)
{
int cmp = t.compareTo(p.t);
if(cmp != 0)
{
return cmp;
}
return i.compareTo(p.i);
}
public void write(DataOutput out) throws IOException
{
t.write(out);
i.write(out);
}
public void readFields(DataInput in) throws IOException
{
t.readFields(in);
i.readFields(in);
}
}
//public class RecordReader<IntWritable, Text>createRecordReader(InputSplit split, TaskAttemptContext contxt)
public static class SortingMapper extends Mapper<Text, Text, Pair, NullWritable> {
String[] output1 = null;
//private Text word = new Text();
@Override
public void map(Text key, Text value, Context context) throws IOException, InterruptedException
{
output1 = value.toString().split(",");
Text word = new Text(output1[0]);
IntWritable freq = new IntWritable(Integer.parseInt(output1[1]));
context.write(new Pair(word, freq), NullWritable.get());
}
//.write() is the method inherited from interface org.apache.hadoop.mapreduce.TaskInputOutputContext
}
public static class FirstPartitioner extends Partitioner<Pair, NullWritable>{
@Override
public int getPartition(Pair p, NullWritable n, int numPartitions)
{
System.out.println("Partitioner");
String word = p.getFirst().toString();
char first = word.charAt(0);
char middle = 'n';
if(middle < first)
{
return 0;
}
else
return 1 % numPartitions; //why does % need???
}
}
public static class KeyComparator extends WritableComparator{
protected KeyComparator()
{
super(Pair.class, true);
}
@Override
public int compare(WritableComparable w1, WritableComparable w2)
{
System.out.println("keyComparator");
Pair v1 = (Pair) w1;
Pair v2 = (Pair) w2;
/*
* since we already count word in the first MR we only need to sort the list by frequency
* so no need to compare Text again
int cmp = Pair.compare(v1.getFirst(), v2.getFirst());
if(cmp != 0) { return cmp; }
*/
return -1 * v1.compareTo(v2);
//possible error: it compares Text first and then compare IntWritable
}
}
public static class GroupComparator extends WritableComparator{
protected GroupComparator()
{
super(Pair.class, true);
}
@Override
public int compare(WritableComparable w1, WritableComparable w2)
{
System.out.println("group Comparator");
Pair v1 = (Pair) w1;
Pair v2 = (Pair) w2;
return v1.getFirst().compareTo(v2.getFirst());
//this compareTo is under binarycomparable
}
}
public static class SortingReducer extends Reducer<Pair, NullWritable, Pair, NullWritable>{
@Override
public void reduce(Pair p, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException
{
System.out.println("sortingReducer");
context.write(p, NullWritable.get());
}
}
public static void main(String[] args) throws Exception]{
Path inputDir = new Path("/home/cloudera/Desktop/inputfile");
Path outputDir = new Path("/home/cloudera/Desktop/outptusort");
Configuration conf2 = new Configuration();
//String[] otherArgs2 = new GenericOptionsParser(conf1, args).getRemainingArgs();
ControlledJob cJob2 = new ControlledJob(conf2);
conf2.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", " ");
cJob2.setJobName("Sorting");
Job job2 = cJob2.getJob();
job2.setJarByClass(Sorting.class);
job2.setInputFormatClass(KeyValueTextInputFormat.class);
job2.setMapperClass(SortingMapper.class);
job2.setPartitionerClass(FirstPartitioner.class);
job2.setSortComparatorClass(KeyComparator.class);
job2.setGroupingComparatorClass(GroupComparator.class);
job2.setReducerClass(SortingReducer.class);
job2.setOutputKeyClass(Pair.class);
job2.setOutputValueClass(NullWritable.class);
job2.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job2, inputDir);
FileOutputFormat.setOutputPath(job2, outputDir);
//Delete output file if exits
FileSystem fs = FileSystem.get(conf2);
if(fs.exists(outputDir)){
fs.delete(outputDir,true);
}
job2.waitForCompletion(true);
}
}
最佳答案
IntWritable freq = new IntWritable(Integer.parseInt(output1[1]));
要求output1
中至少有2个元素。出现异常意味着只有一个元素。
这是由value.toString()
的值引起的:它不包含逗号,或者字符串中的所有逗号都在结尾。在后一种情况下,这是因为没有负数限制的split
将剥离空的尾随 token 。
您应在访问数组元素之前检查数组的长度。