直到晚上,整个环境跑起来,web监控等也正常。于是用Scala写了一个word count的测试程序。
Scala可以很好地调用Java代码,所以,唯一要做的就是配置好编译环境,并把scala-library.jar和
应用程序一并打包。通过简单的探索,就得到了可以运行的配置。
这里给出简单的编译和打包方法,供参考。
编译脚本:这个脚本的主要作用是设置正确的classpath。
- #!/bin/bash
- # fast scala compiler for hadoop
- # accept every options fsc supported, except '-classpath'
- class_path=.:$JAVA_HOME/lib/tools.jar:$HADOOP_HOME/hadoop-2-core.jar
- class_path=${class_path}:$HADOOP_HOME/lib/commons-cli-2.0-SNAPSHOT.jar
- class_path=${class_path}:$HADOOP_HOME/lib/commons-codec-1.3.jar
- class_path=${class_path}:$HADOOP_HOME/lib/commons-httpclient-3.0.1.jar
- class_path=${class_path}:$HADOOP_HOME/lib/commons-logging-1.0.4.jar
- class_path=${class_path}:$HADOOP_HOME/lib/commons-logging-api-1.0.4.jar
- class_path=${class_path}:$HADOOP_HOME/lib/commons-net-1.4.1.jar
- class_path=${class_path}:$HADOOP_HOME/lib/derbyclient.jar
- class_path=${class_path}:$HADOOP_HOME/lib/derby.jar
- class_path=${class_path}:$HADOOP_HOME/lib/hadoop-2-baidu-sos.jar
- class_path=${class_path}:$HADOOP_HOME/lib/hsqldb-1.8.0.10.jar
- class_path=${class_path}:$HADOOP_HOME/lib/jets3t-0.6.1.jar
- class_path=${class_path}:$HADOOP_HOME/lib/jetty-6.1.14.jar
- class_path=${class_path}:$HADOOP_HOME/lib/jetty-util-6.1.14.jar
- class_path=${class_path}:$HADOOP_HOME/lib/json-org.jar
- class_path=${class_path}:$HADOOP_HOME/lib/junit-3.8.1.jar
- class_path=${class_path}:$HADOOP_HOME/lib/kfs-0.2.2.jar
- class_path=${class_path}:$HADOOP_HOME/lib/log4j-1.2.15.jar
- class_path=${class_path}:$HADOOP_HOME/lib/oro-2.0.8.jar
- class_path=${class_path}:$HADOOP_HOME/lib/servlet-api.jar
- class_path=${class_path}:$HADOOP_HOME/lib/slf4j-api-1.4.3.jar
- class_path=${class_path}:$HADOOP_HOME/lib/slf4j-log4j12-1.4.3.jar
- class_path=${class_path}:$HADOOP_HOME/lib/xmlenc-0.52.jar
- class_path=${class_path}:$HADOOP_HOME/lib/jetty-ext/commons-el.jar
- class_path=${class_path}:$HADOOP_HOME/lib/jetty-ext/jasper-compiler.jar
- class_path=${class_path}:$HADOOP_HOME/lib/jetty-ext/jasper-runtime.jar
- class_path=${class_path}:$HADOOP_HOME/lib/jetty-ext/jsp-api.jar
- fsc -classpath ${class_path} "$@"
- #!/bin/bash
- # package up a jar file to sumit for scala
- if [ $# -ne 2 ]; then
- echo "Usage: `basename $0` jar_file classes_dir"
- exit 1
- fi
- jarfile=$1
- classes_dir=$2
- if [ -e ${classes_dir}/lib ]; then
- echo "adding libraries: "
- ls ${classes_dir}/lib
- else
- mkdir ${classes_dir}/lib
- fi
- cp $SCALA_HOME/lib/scala-library.jar ${classes_dir}/lib/ &&
- jar -cvf ${jarfile}.jar -C ${classes_dir} .
- package net.liangkun
-
- import java.io.IOException
- import java.util._
-
- import org.apache.hadoop.fs.Path
- import org.apache.hadoop.conf._
- import org.apache.hadoop.io._
- import org.apache.hadoop.mapred._
- import org.apache.hadoop.util._
-
- class Map extends MapReduceBase
- with Mapper[LongWritable, Text, Text, IntWritable] {
- private val one = new IntWritable(1);
- private val word = new Text();
-
- def map(key: LongWritable, value: Text,
- output: OutputCollector[Text, IntWritable],
- reporter: Reporter
- ) {
-
- val line = value.toString
- val tokenizer = new StringTokenizer(line)
- while(tokenizer.hasMoreTokens) {
- word.set(tokenizer.nextToken)
- output.collect(word, one)
- }
- }
- }
- class Reduce extends MapReduceBase
- with Reducer[Text, IntWritable, Text, IntWritable] {
- def reduce(key: Text, values: Iterator[IntWritable],
- output: OutputCollector[Text, IntWritable],
- reporter: Reporter
- ) {
- output.collect(key, new IntWritable(count(0, values)))
- def count(sum: Int, vs: Iterator[IntWritable]): Int =
- if(vs.hasNext)
- count(sum + vs.next.get, vs)
- else
- sum
- }
- }
- object WordCount {
- def main(args: Array[String]) {
- val conf = new JobConf(this.getClass)
- conf.setJobName("WordCount")
- conf.setOutputKeyClass(classOf[Text])
- conf.setOutputValueClass(classOf[IntWritable])
- conf.setMapperClass(classOf[Map])
- conf.setCombinerClass(classOf[Reduce])
- conf.setReducerClass(classOf[Reduce])
- conf.setInputFormat(classOf[TextInputFormat])
- conf.setOutputFormat(classOf[TextOutputFormat[Text, IntWritable]])
- FileInputFormat.setInputPaths(conf, new Path(args(0)))
- FileOutputFormat.setOutputPath(conf, new Path(args(1)))
- JobClient.runJob(conf)
- }
- }