我写了这个Java hadoop程序来执行文件的并行索引。文件是在Eclipse中创建的
package org.myorg;
import java.io.*;
import java.util.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;
public class ParallelIndexation {
public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable zero = new IntWritable(0);
private Text word = new Text();
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
String line = value.toString();
int CountComputers;
//DataInputStream ConfigFile = new DataInputStream( new FileInputStream("countcomputers.txt"));
FileInputStream fstream = new FileInputStream("/usr/countcomputers.txt"); // путь к файлу
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new InputStreamReader(in));
String result = br.readLine(); // читаем как строку
CountComputers = Integer.parseInt(result); // переводим строку в число
//CountComputers=ConfigFile.readInt();
in.close();
fstream.close();
ArrayList<String> paths = new ArrayList<String>();
StringTokenizer tokenizer = new StringTokenizer(line, "\n");
while (tokenizer.hasMoreTokens())
{
paths.add(tokenizer.nextToken());
}
String[] ConcatPaths= new String[CountComputers];
int NumberOfElementConcatPaths=0;
if (paths.size()%CountComputers==0)
{
for (int i=0; i<CountComputers; i++)
{
ConcatPaths[i]=paths.get(NumberOfElementConcatPaths);
NumberOfElementConcatPaths+=paths.size()/CountComputers;
for (int j=1; j<paths.size()/CountComputers; j++)
{
ConcatPaths[i]+="\n"+paths.get(i*paths.size()/CountComputers+j);
}
}
}
else
{
NumberOfElementConcatPaths=0;
for (int i=0; i<paths.size()%CountComputers; i++)
{
ConcatPaths[i]=paths.get(NumberOfElementConcatPaths);
NumberOfElementConcatPaths+=paths.size()/CountComputers+1;
for (int j=1; j<paths.size()/CountComputers+1; j++)
{
ConcatPaths[i]+="\n"+paths.get(i*(paths.size()/CountComputers+1)+j);
}
}
for (int k=paths.size()%CountComputers; k<CountComputers; k++)
{
ConcatPaths[k]=paths.get(NumberOfElementConcatPaths);
NumberOfElementConcatPaths+=paths.size()/CountComputers;
for (int j=1; j<paths.size()/CountComputers; j++)
{
ConcatPaths[k]+="\n"+paths.get((k-paths.size()%CountComputers)*paths.size()/CountComputers+paths.size()%CountComputers*(paths.size()/CountComputers+1)+j);
}
}
}
//CountComputers=ConfigFile.readInt();
for (int i=0; i<ConcatPaths.length; i++)
{
word.set(ConcatPaths[i]);
output.collect(word, zero);
}
}
}
public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
public native long Traveser(String Path);
public native void Configure(String Path);
public void reduce(Text key, IntWritable value, OutputCollector<Text, LongWritable> output, Reporter reporter) throws IOException {
long count;
String line = key.toString();
ArrayList<String> ProcessedPaths = new ArrayList<String>();
StringTokenizer tokenizer = new StringTokenizer(line, "\n");
while (tokenizer.hasMoreTokens())
{
ProcessedPaths.add(tokenizer.nextToken());
}
Configure("/etc/nsindexer.conf");
for (int i=0; i<ProcessedPaths.size(); i++)
{
count=Traveser(ProcessedPaths.get(i));
}
output.collect(key, new LongWritable(count));
}
static
{
System.loadLibrary("nativelib");
}
}
public static void main(String[] args) throws Exception {
JobConf conf = new JobConf(ParallelIndexation.class);
conf.setJobName("parallelindexation");
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(LongWritable.class);
conf.setMapperClass(Map.class);
conf.setCombinerClass(Reduce.class);
conf.setReducerClass(Reduce.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
}
}
通过团队在Nexenta Illumos操作系统(solaris)中进行编译的结果
javac -classpath hadoop-examples-1.0.1.jar -d folder/classes folder/src/ParallelIndexation.java,
收到以下错误
folder/src/ParallelIndexation.java:23: error: unmappable character for encoding UTF8
FileInputStream fstream = new FileInputStream("/usr/countcomputers.txt"); // ���� � �����
^
folder/src/ParallelIndexation.java:23: error: unmappable character for encoding UTF8
FileInputStream fstream = new FileInputStream("/usr/countcomputers.txt"); // ���� � �����
^
folder/src/ParallelIndexation.java:23: error: unmappable character for encoding UTF8
FileInputStream fstream = new FileInputStream("/usr/countcomputers.txt"); // ���� � �����
^
folder/src/ParallelIndexation.java:23: error: unmappable character for encoding UTF8
FileInputStream fstream = new FileInputStream("/usr/countcomputers.txt"); // ���� � �����
^
folder/src/ParallelIndexation.java:23: error: unmappable character for encoding UTF8
FileInputStream fstream = new FileInputStream("/usr/countcomputers.txt"); // ���� � �����
^
folder/src/ParallelIndexation.java:23: error: unmappable character for encoding UTF8
FileInputStream fstream = new FileInputStream("/usr/countcomputers.txt"); // ���� � �����
^
folder/src/ParallelIndexation.java:23: error: unmappable character for encoding UTF8
FileInputStream fstream = new FileInputStream("/usr/countcomputers.txt"); // ���� � �����
^
folder/src/ParallelIndexation.java:23: error: unmappable character for encoding UTF8
FileInputStream fstream = new FileInputStream("/usr/countcomputers.txt"); // ���� � �����
^
folder/src/ParallelIndexation.java:23: error: unmappable character for encoding UTF8
FileInputStream fstream = new FileInputStream("/usr/countcomputers.txt"); // ���� � �����
^
folder/src/ParallelIndexation.java:23: error: unmappable character for encoding UTF8
FileInputStream fstream = new FileInputStream("/usr/countcomputers.txt"); // ���� � �����
^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
String result = br.readLine(); // ������ ��� ������
^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
String result = br.readLine(); // ������ ��� ������
^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
String result = br.readLine(); // ������ ��� ������
^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
String result = br.readLine(); // ������ ��� ������
^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
String result = br.readLine(); // ������ ��� ������
^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
String result = br.readLine(); // ������ ��� ������
^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
String result = br.readLine(); // ������ ��� ������
^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
String result = br.readLine(); // ������ ��� ������
^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
String result = br.readLine(); // ������ ��� ������
^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
String result = br.readLine(); // ������ ��� ������
^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
String result = br.readLine(); // ������ ��� ������
^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
String result = br.readLine(); // ������ ��� ������
^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
String result = br.readLine(); // ������ ��� ������
^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
String result = br.readLine(); // ������ ��� ������
^
folder/src/ParallelIndexation.java:26: error: unmappable character for encoding UTF8
String result = br.readLine(); // ������ ��� ������
^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
CountComputers = Integer.parseInt(result); // ��������� ������ � �����
^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
CountComputers = Integer.parseInt(result); // ��������� ������ � �����
^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
CountComputers = Integer.parseInt(result); // ��������� ������ � �����
^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
CountComputers = Integer.parseInt(result); // ��������� ������ � �����
^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
CountComputers = Integer.parseInt(result); // ��������� ������ � �����
^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
CountComputers = Integer.parseInt(result); // ��������� ������ � �����
^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
CountComputers = Integer.parseInt(result); // ��������� ������ � �����
^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
CountComputers = Integer.parseInt(result); // ��������� ������ � �����
^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
CountComputers = Integer.parseInt(result); // ��������� ������ � �����
^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
CountComputers = Integer.parseInt(result); // ��������� ������ � �����
^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
CountComputers = Integer.parseInt(result); // ��������� ������ � �����
^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
CountComputers = Integer.parseInt(result); // ��������� ������ � �����
^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
CountComputers = Integer.parseInt(result); // ��������� ������ � �����
^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
CountComputers = Integer.parseInt(result); // ��������� ������ � �����
^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
CountComputers = Integer.parseInt(result); // ��������� ������ � �����
^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
CountComputers = Integer.parseInt(result); // ��������� ������ � �����
^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
CountComputers = Integer.parseInt(result); // ��������� ������ � �����
^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
CountComputers = Integer.parseInt(result); // ��������� ������ � �����
^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
CountComputers = Integer.parseInt(result); // ��������� ������ � �����
^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
CountComputers = Integer.parseInt(result); // ��������� ������ � �����
^
folder/src/ParallelIndexation.java:27: error: unmappable character for encoding UTF8
CountComputers = Integer.parseInt(result); // ��������� ������ � �����
^
46 errors
如何在Eclipse中更改UTF8的编码?
最佳答案
在Eclipse中,可以在三个位置设置文本文件编码:
我建议将其设置在工作空间和项目级别,并且仅在必要时在文件级别设置。
设置文件编码并不会真正转换文件本身。您可能需要为此使用单独的工具(例如
iconv
)。