分享一下我老师大神的人工智能教程!零基础,通俗易懂!http://blog.csdn.net/jiangjunshow
也欢迎大家转载本篇文章。分享知识,造福人民,实现我们中华民族伟大复兴!
一,编程实现以下指定功能,并利用Hadoop提供的Shell命令完成相同任务:
向HDFS中上传任意文本文件,如果指定的文件在HDFS中已经存在,由用户指定是追加到原有文件末尾还是覆盖原有的文件
Shell命令实现:1.先到Hadoop主文件夹cd /usr/local/hadoop
2.启动Hadoop服务
sbin/start-dfs.shsbin/start-yarn.sh
3.创建两个任意文本文件用于实验
echo "hello world" > local.txtecho "hello hadoop" >text.txt
4.创建用户工作目录(HDFS默认工作目录格式为/user/当前用户)
hadoop fs -mkdir -p /user/当前用户名
5.检查文件是否存在
hadoop fs -test -e text.txtecho $?
6.上传本地文件到HDFS系统hadoop fs -put text.txt
7.追加到文件末尾的指令hadoop fs -appendToFile local.txt text.txt
8.查看HDFS文件的内容hadoop fs -cat text.txt
9.覆盖原有文件的指令(覆盖之后再执行一遍上一步)hadoop fs -copyFromLocal -f local.txt text.txt
10.以上步骤也可以用如下命令实现
if $(hadoop fs -test -e text.txt);then $(hadoop fs -appendToFile local.txt text.txt);else $(hadoop fs -copyFromLocal -f local.txt text.txt);fi
package cn.edu.zucc.hdfs;import java.io.FileInputStream;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class CopyFromLocalFile {/** * 判断路径是否存在 */public static boolean test(Configuration conf, String path) { try (FileSystem fs = FileSystem.get(conf)) { return fs.exists(new Path(path));
} catch (IOException e) { e.printStackTrace(); return false; } } /** * 复制文件到指定路径 若路径已存在,则进行覆盖 */ public static void copyFromLocalFile(Configuration conf, String localFilePath, String remoteFilePath) { Path localPath = new Path(localFilePath); Path remotePath = new Path(remoteFilePath); try (FileSystem fs = FileSystem.get(conf)) { /* fs.copyFromLocalFile 第一个参数表示是否删除源文件,第二个参数表示是否覆盖 */ fs.copyFromLocalFile(false, true, localPath, remotePath); } catch (IOException e) { e.printStackTrace(); } } /** * 追加文件内容 */ public static void appendToFile(Configuration conf, String localFilePath, String remoteFilePath) { Path remotePath = new Path(remoteFilePath); try (FileSystem fs = FileSystem.get(conf); FileInputStream in = new FileInputStream(localFilePath);) { FSDataOutputStream out = fs.append(remotePath); byte[] data = new byte[1024]; int read = -1; while ((read = in.read(data)) > 0) { out.write(data, 0, read); } out.close(); } catch (IOException e) { e.printStackTrace(); } } /** * 主函数 */ public static void main(String[] args) { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://localhost:9000"); String localFilePath = "/usr/local/hadoop/text.txt"; // 本地路径 String remoteFilePath = "/user/tiny/text.txt"; // HDFS路径 // String choice = "append"; // 若文件存在则追加到文件末尾 String choice = "overwrite"; // 若文件存在则覆盖 try { /* 判断文件是否存在 */ boolean fileExists = false; if (CopyFromLocalFile.test(conf, remoteFilePath)) { fileExists = true; System.out.println(remoteFilePath + " 已存在."); } else { System.out.println(remoteFilePath + " 不存在."); } /* 进行处理 */ if (!fileExists) { // 文件不存在,则上传 CopyFromLocalFile.copyFromLocalFile(conf, localFilePath, remoteFilePath); System.out.println(localFilePath + " 已上传至 " + remoteFilePath); } else if (choice.equals("overwrite")) { // 选择覆盖 CopyFromLocalFile.copyFromLocalFile(conf, localFilePath, remoteFilePath); System.out.println(localFilePath + " 已覆盖 " + remoteFilePath); } else if (choice.equals("append")) { // 选择追加 CopyFromLocalFile.appendToFile(conf, localFilePath, remoteFilePath); System.out.println(localFilePath + " 已追加至 " + remoteFilePath); } } catch (Exception e) { e.printStackTrace(); } }}
从HDFS中下载指定文件,如果本地文件与要下载的文件名称相同,则自动对下载的文件重命名。
Shell命令实现:if $(hadoop fs -test -e /usr/local/hadoop/text.txt);then $(hadoop fs -copyToLocal text.txt ./text.txt); else $(hadoop fs -copyToLocal text.txt ./text2.txt); fi
package cn.edu.zucc.hdfs;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.*;import org.apache.hadoop.fs.FileSystem;import java.io.*;public class CopyToLocal { /** * 下载文件到本地 判断本地路径是否已存在,若已存在,则自动进行重命名 */ public static void copyToLocal(Configuration conf, String remoteFilePath, String localFilePath) { Path remotePath = new Path(remoteFilePath); try (FileSystem fs = FileSystem.get(conf)) { File f = new File(localFilePath); /* 如果文件名存在,自动重命名(在文件名后面加上 _0, _1 ...) */ if (f.exists()) { System.out.println(localFilePath + " 已存在."); Integer i = Integer.valueOf(0); while (true) { f = new File(localFilePath + "_" + i.toString()); if (!f.exists()) { localFilePath = localFilePath + "_" + i.toString(); break; } else { i++; continue; } } System.out.println("将重新命名为: " + localFilePath); } // 下载文件到本地
向HDFS中上传任意文本文件,如果指定的文件在HDFS中已经存在,由用户指定是追加到原有文件末尾还是覆盖原有的文件
Shell命令实现:1.先到Hadoop主文件夹cd /usr/local/hadoop
2.启动Hadoop服务
sbin/start-dfs.shsbin/start-yarn.sh
3.创建两个任意文本文件用于实验
echo "hello world" > local.txtecho "hello hadoop" >text.txt
4.创建用户工作目录(HDFS默认工作目录格式为/user/当前用户)
hadoop fs -mkdir -p /user/当前用户名
5.检查文件是否存在
hadoop fs -test -e text.txtecho $?
6.上传本地文件到HDFS系统hadoop fs -put text.txt
7.追加到文件末尾的指令hadoop fs -appendToFile local.txt text.txt
8.查看HDFS文件的内容hadoop fs -cat text.txt
9.覆盖原有文件的指令(覆盖之后再执行一遍上一步)hadoop fs -copyFromLocal -f local.txt text.txt
10.以上步骤也可以用如下命令实现
if $(hadoop fs -test -e text.txt);then $(hadoop fs -appendToFile local.txt text.txt);else $(hadoop fs -copyFromLocal -f local.txt text.txt);fi
package cn.edu.zucc.hdfs;import java.io.FileInputStream;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class CopyFromLocalFile { /** * 判断路径是否存在 */ public static boolean test(Configuration conf, String path) { try (FileSystem fs = FileSystem.get(conf)) { return fs.exists(new Path(path)); } catch (IOException e) { e.printStackTrace(); return false; } } /** * 复制文件到指定路径 若路径已存在,则进行覆盖 */ public static void copyFromLocalFile(Configuration conf, String localFilePath, String remoteFilePath) { Path localPath = new Path(localFilePath); Path remotePath = new Path(remoteFilePath); try (FileSystem fs = FileSystem.get(conf)) { /* fs.copyFromLocalFile 第一个参数表示是否删除源文件,第二个参数表示是否覆盖 */ fs.copyFromLocalFile(false, true, localPath, remotePath); } catch (IOException e) { e.printStackTrace(); } } /** * 追加文件内容 */ public static void appendToFile(Configuration conf, String localFilePath, String remoteFilePath) { Path remotePath = new Path(remoteFilePath); try (FileSystem fs = FileSystem.get(conf); FileInputStream in = new FileInputStream(localFilePath);) { FSDataOutputStream out = fs.append(remotePath); byte[] data = new byte[1024]; int read = -1; while ((read = in.read(data)) > 0) { out.write(data, 0, read); } out.close(); } catch (IOException e) { e.printStackTrace(); } } /** * 主函数 */ public static void main(String[] args) { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://localhost:9000"); String localFilePath = "/usr/local/hadoop/text.txt"; // 本地路径 String remoteFilePath = "/user/tiny/text.txt"; // HDFS路径 // String choice = "append"; // 若文件存在则追加到文件末尾 String choice = "overwrite"; // 若文件存在则覆盖 try { /* 判断文件是否存在 */ boolean fileExists = false; if (CopyFromLocalFile.test(conf, remoteFilePath)) { fileExists = true; System.out.println(remoteFilePath + " 已存在."); } else { System.out.println(remoteFilePath + " 不存在."); } /* 进行处理 */ if (!fileExists) { // 文件不存在,则上传 CopyFromLocalFile.copyFromLocalFile(conf, localFilePath, remoteFilePath); System.out.println(localFilePath + " 已上传至 " + remoteFilePath); } else if (choice.equals("overwrite")) { // 选择覆盖 CopyFromLocalFile.copyFromLocalFile(conf, localFilePath, remoteFilePath); System.out.println(localFilePath + " 已覆盖 " + remoteFilePath); } else if (choice.equals("append")) { // 选择追加 CopyFromLocalFile.appendToFile(conf, localFilePath, remoteFilePath); System.out.println(localFilePath + " 已追加至 " + remoteFilePath); } } catch (Exception e) { e.printStackTrace(); } }}
从HDFS中下载指定文件,如果本地文件与要下载的文件名称相同,则自动对下载的文件重命名。
Shell命令实现:if $(hadoop fs -test -e /usr/local/hadoop/text.txt);then $(hadoop fs -copyToLocal text.txt ./text.txt); else $(hadoop fs -copyToLocal text.txt ./text2.txt); fi
package cn.edu.zucc.hdfs;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.*;import org.apache.hadoop.fs.FileSystem;import java.io.*;public class CopyToLocal { /** * 下载文件到本地 判断本地路径是否已存在,若已存在,则自动进行重命名 */ public static void copyToLocal(Configuration conf, String remoteFilePath, String localFilePath) { Path remotePath = new Path(remoteFilePath); try (FileSystem fs = FileSystem.get(conf)) { File f = new File(localFilePath); /* 如果文件名存在,自动重命名(在文件名后面加上 _0, _1 ...) */ if (f.exists()) { System.out.println(localFilePath + " 已存在."); Integer i = Integer.valueOf(0); while (true) { f = new File(localFilePath + "_" + i.toString()); if (!f.exists()) { localFilePath = localFilePath + "_" + i.toString(); break; } else { i++; continue; } } System.out.println("将重新命名为: " + localFilePath); } // 下载文件到本地 Path localPath = new Path(localFilePath); fs.copyToLocalFile(remotePath, localPath); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * 主函数 */ public static void main(String[] args) { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://localhost:9000"); String localFilePath = "/usr/local/hadoop/text.txt"; // 本地路径 String remoteFilePath = "/user/tiny/text.txt"; // HDFS路径 try { CopyToLocal.copyToLocal(conf, remoteFilePath, localFilePath); System.out.println("下载完成"); } catch (Exception e) { e.printStackTrace(); } }}
http://www.jianshu.com/p/0663d74b79b5
Path localPath = new Path(localFilePath); fs.copyToLocalFile(remotePath, localPath); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * 主函数 */ public static void main(String[] args) { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://localhost:9000"); String localFilePath = "/usr/local/hadoop/text.txt"; // 本地路径 String remoteFilePath = "/user/tiny/text.txt"; // HDFS路径 try { CopyToLocal.copyToLocal(conf, remoteFilePath, localFilePath); System.out.println("下载完成"); } catch (Exception e) { e.printStackTrace(); } }} http://www.jianshu.com/p/0663d74b79b5