java - 我在运行 map 缩减排序程序时遇到错误

I  ma getting this error while running this program in mapreduce.this is program of sorting in mapreduce.I can't find out where i am worng. so please would you give me suggestion for this program to solve my problem.


15/02/19 08:59:10 INFO jvm.JvmMetrics: Initializing JVM Metrics with  processName=JobTracker, sessionId=
15/02/19 08:59:10 WARN util.NativeCodeLoader: Unable to load native-hadoop  library for your platform... using builtin-java classes where applicable
15/02/19 08:59:10 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
15/02/19 08:59:11 WARN mapred.JobClient: No job jar file set.  User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
15/02/19 08:59:11 INFO input.FileInputFormat: Total input paths to process : 1
15/02/19 08:59:16 INFO mapred.JobClient: Runningjob:job_local_0001
15/02/19 08:59:17 INFO mapred.JobClient:  map 0% reduce 0%
15/02/19 08:59:18 INFO mapred.MapTask: io.sort.mb = 100
15/02/19 08:59:28 INFO mapred.MapTask: data buffer = 79691776/99614720
15/02/19 08:59:28 INFO mapred.MapTask: record buffer = 262144/327680
15/02/19 08:59:28 INFO mapred.JobClient: Job complete: job_local_0001
15/02/19 08:59:28 INFO mapred.JobClient: Counters: 0
15/02/19 08:59:28 WARN mapred.LocalJobRunner: job_local_0001
 java.lang.ArrayIndexOutOfBoundsException: 1
at demo.Sorting$SortingMapper.map(Sorting.java:97)
at demo.Sorting$SortingMapper.map(Sorting.java:1)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:646)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:322)
at    org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:210)

标题:我是mapreduce编程的初学者。我找不到为什么会发生此错误。我在此程序中从未使用过数组，但是我生成了Array Exception。所以有人请帮助我删除此异常

public class Sorting{
public static class Pair implements WritableComparable<Pair>{
private Text t;
private IntWritable i;
import java.util.Comparator;
public void set(Text t, IntWritable i){
    this.t = t;
    this.i = i;
}

public Text getFirst() { return t; }
public IntWritable getSecond() { return i; }


public Pair()
{
    set(new Text(), new IntWritable());
}

public Pair(Text t, IntWritable i)
{
    set(t, i);
}

public int compareTo(Pair p)
{
    int cmp = t.compareTo(p.t);
    if(cmp != 0)
    {
        return cmp;
    }
    return i.compareTo(p.i);
}

public void write(DataOutput out) throws IOException
{
    t.write(out);
    i.write(out);
}

public void readFields(DataInput in) throws IOException
{
    t.readFields(in);
    i.readFields(in);
}
}

 //public class RecordReader<IntWritable, Text>createRecordReader(InputSplit split, TaskAttemptContext contxt)

public static class SortingMapper extends Mapper<Text, Text, Pair, NullWritable> {
String[] output1 = null;
//private Text word = new Text();

@Override
public void map(Text key, Text value, Context context) throws IOException, InterruptedException
{
    output1 = value.toString().split(",");

    Text word = new Text(output1[0]);
    IntWritable freq = new IntWritable(Integer.parseInt(output1[1]));

    context.write(new Pair(word, freq), NullWritable.get());
}
        //.write() is the method inherited from interface org.apache.hadoop.mapreduce.TaskInputOutputContext
 }



 public static class FirstPartitioner extends Partitioner<Pair, NullWritable>{
@Override
public int getPartition(Pair p, NullWritable n, int numPartitions)
{
    System.out.println("Partitioner");
    String word = p.getFirst().toString();

    char first = word.charAt(0);
    char middle = 'n';

    if(middle < first)
    {
        return 0;
    }
    else
        return 1 % numPartitions; //why does % need???
}
}

public static class KeyComparator extends WritableComparator{

protected KeyComparator()
{
    super(Pair.class, true);
}

@Override
public int compare(WritableComparable w1, WritableComparable w2)
{
    System.out.println("keyComparator");
    Pair v1 = (Pair) w1;
    Pair v2 = (Pair) w2;

    /*
     * since we already count word in the first MR we only need to sort the list by frequency
     * so no need to compare Text again
    int cmp = Pair.compare(v1.getFirst(), v2.getFirst());
    if(cmp != 0) {  return cmp; }
    */

    return -1 * v1.compareTo(v2);
    //possible error: it compares Text first and then compare IntWritable
}
 }

public static class GroupComparator extends WritableComparator{
protected GroupComparator()
{
    super(Pair.class, true);
}

@Override
public int compare(WritableComparable w1, WritableComparable w2)
{
    System.out.println("group Comparator");
    Pair v1 = (Pair) w1;
    Pair v2 = (Pair) w2;
    return v1.getFirst().compareTo(v2.getFirst());
    //this compareTo is under binarycomparable
}
}

 public static class SortingReducer extends Reducer<Pair, NullWritable, Pair, NullWritable>{
@Override
public void reduce(Pair p, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException
{
    System.out.println("sortingReducer");
    context.write(p, NullWritable.get());
}
}

public static void main(String[] args) throws Exception]{

Path inputDir  = new Path("/home/cloudera/Desktop/inputfile");
Path outputDir = new Path("/home/cloudera/Desktop/outptusort");
   Configuration conf2 = new Configuration();
//String[] otherArgs2 = new GenericOptionsParser(conf1, args).getRemainingArgs();

ControlledJob cJob2 = new ControlledJob(conf2);
conf2.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", " ");
cJob2.setJobName("Sorting");

Job job2 = cJob2.getJob();

job2.setJarByClass(Sorting.class);

job2.setInputFormatClass(KeyValueTextInputFormat.class);

job2.setMapperClass(SortingMapper.class);
job2.setPartitionerClass(FirstPartitioner.class);
job2.setSortComparatorClass(KeyComparator.class);
job2.setGroupingComparatorClass(GroupComparator.class);
job2.setReducerClass(SortingReducer.class);

job2.setOutputKeyClass(Pair.class);
job2.setOutputValueClass(NullWritable.class);

job2.setOutputFormatClass(TextOutputFormat.class);

FileInputFormat.addInputPath(job2, inputDir);
FileOutputFormat.setOutputPath(job2, outputDir);

//Delete output file if exits
FileSystem fs = FileSystem.get(conf2);
if(fs.exists(outputDir)){
    fs.delete(outputDir,true);
}


job2.waitForCompletion(true);

}
 }

最佳答案

IntWritable freq = new IntWritable(Integer.parseInt(output1[1]));要求output1中至少有2个元素。出现异常意味着只有一个元素。

这是由value.toString()的值引起的:它不包含逗号，或者字符串中的所有逗号都在结尾。在后一种情况下，这是因为没有负数限制的split将剥离空的尾随 token 。

您应在访问数组元素之前检查数组的长度。