继上篇

MapReduce清洗数据

package mapreduce;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class CleanData {
    public static class Map extends Mapper<Object , Text , Text , IntWritable>{
        private static Text newKey=new Text();
        private static String chage(String data) {
            char[] str = data.toCharArray();
            String[] time = new String[7];
            int j = 0;
            int k = 0;
            for(int i=0;i<str.length;i++) {
                if(str[i]=='/'||str[i]==':'||str[i]==32) {
                    time[k] = data.substring(j,i);
                    j = i+1;
                    k++;
                }
            }
            time[k] = data.substring(j, data.length());

             switch(time[1]) { case "Jan":time[1]="01";break; case
              "Feb":time[1]="02";break; case "Mar":time[1]="03";break; case
              "Apr":time[1]="04";break; case "May":time[1]="05";break; case
              "Jun":time[1]="06";break; case "Jul":time[1]="07";break; case
              "Aug":time[1]="08";break; case "Sep":time[1]="09";break; case
              "Oct":time[1]="10";break; case "Nov":time[1]="11";break; case
              "Dec":time[1]="12";break; }

            data = time[2]+"-"+time[1]+"-"+time[0]+" "+time[3]+":"+time[4]+":"+time[5];
            return data;
        }
        public void map(Object key,Text value,Context context) throws IOException, InterruptedException{
            String line=value.toString();
            System.out.println(line);
            String arr[]=line.split(",");

            String ip = arr[0];
            String date = arr[1];
            String day = arr[2];
            String traffic = arr[3];
            String type = arr[4];
            String id = arr[5];

            date = chage(date);
            traffic = traffic.substring(0, traffic.length()-1);

            newKey.set(ip+'\t'+date+'\t'+day+'\t'+traffic+'\t'+type);
            //newKey.set(ip+','+date+','+day+','+traffic+','+type); 
            int click=Integer.parseInt(id);
            context.write(newKey, new IntWritable(click));
        }
    }
    public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable>{
        public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException{
            for(IntWritable val : values){
                context.write(key, val);
            }
        }
    }
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException{
        Configuration conf=new Configuration();
        System.out.println("start");
        Job job =new Job(conf,"cleanData");
        job.setJarByClass(CleanData.class);
        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        Path in=new Path("hdfs://192.168.137.67:9000/mymapreducel/in/result.txt");
        Path out=new Path("hdfs://192.168.137.67:9000/mymapreducelShiYan/out1");
        FileInputFormat.addInputPath(job,in);
        FileOutputFormat.setOutputPath(job,out);
        System.exit(job.waitForCompletion(true) ? 0 : 1);

    }
} 

今天遇到了一个

java.lang.ClassCastException: class org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$GetFileInfoRequestProto cannot be cast to class com.google.protobuf.Message (org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$GetFileI....

的错误

搞了好几个小时也没有解决,最后没办法了把导的包全部移除后重新导入,解决了问题。

01-03 04:56
查看更多