一:需要的jar包:
1. <?xml version="1.0" encoding="UTF-8"?>
2. <project xmlns="http://maven.apache.org/POM/4.0.0"
3. xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4. xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5. <modelVersion>4.0.0</modelVersion>
6.
7. <groupId>com.cl</groupId>
8. <artifactId>hadoop</artifactId>
9. <version>1.0-SNAPSHOT</version>
10.
11. <url>http://maven.apache.org</url>
12.
13. <properties>
14. <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
15. </properties>
16.
17. <dependencies>
18. <dependency>
19. <groupId>junit</groupId>
20. <artifactId>junit</artifactId>
21. <version>4.11</version>
22. <scope>test</scope>
23. </dependency>
24. <!-- hadoop 分布式文件系统类库 -->
25. <dependency>
26. <groupId>org.apache.hadoop</groupId>
27. <artifactId>hadoop-hdfs</artifactId>
28. <version>2.8.3</version>
29. </dependency>
30. <!-- hadoop 公共类库 -->
31. <dependency>
32. <groupId>org.apache.hadoop</groupId>
33. <artifactId>hadoop-common</artifactId>
34. <version>2.8.3</version>
35. </dependency>
36.
37. </dependencies>
38. </project>
二:连接HDFS和客户端
1. public class HdfsUtil {
2. public static void main(String[] args) throws IOException, InterruptedException, URISyntaxException {
3. // 构造一个配置参数封装对象
4. new Configuration();
5. // 构造一个hdfs的客户端
6. new URI("hdfs://192.168.10.252:9000"), conf, "root");
7. // 用hdfs文件系统的客户端对象fs来操作文件,比如上传一个文件
8. new Path("/Users/cl/Downloads/docker"), new Path("/"));
9. fs.close();
10. }
11. }
三:Java客户端基本操作:
1. import org.apache.hadoop.conf.Configuration;
2. import org.apache.hadoop.fs.*;
3. import org.junit.Before;
4. import org.junit.Test;
5.
6. import java.io.FileNotFoundException;
7. import java.io.IOException;
8. import java.net.URI;
9. import java.net.URISyntaxException;
10.
11.
12. public class HDFSUtil {
13. null;
14.
15. @Before
16. public void init() throws IOException, InterruptedException, URISyntaxException {
17. // 构造一个配置参数封装对象
18. new Configuration();
19. // 构造一个hdfs的客户端
20. new URI("http://192.168.10.252:9000"), conf, "root");
21. }
22.
23. /*
24. * 从本地上传文件到hdfs中
25. */
26. @Test
27. public void testUpload() throws IllegalArgumentException, IOException {
28. new Path("/Users/cl/Downloads/docker"), new Path("/"));
29. fs.close();
30. }
31.
32. /*
33. * 从hdfs中下载文件到本地
34. */
35. @Test
36. public void testDownload() throws IllegalArgumentException, IOException {
37. false, new Path("/docker"), new Path("/Users/cl/Downloads/"), true);
38. fs.close();
39. }
40.
41. /*
42. * 文件夹操作
43. */
44. @Test
45. public void testDir() throws IllegalArgumentException, IOException {
46. new Path("/aaa"));
47. "创建了一个文件夹:/aaa");
48.
49. boolean exists = fs.exists(new Path("/aaa"));
50. "/aaa文件夹存在否?" + exists);
51.
52. new Path("/Users/cl/Downloads/input.txt"), new Path("/aaa"));
53. "成功上传了一个文件到/aaa目录下");
54.
55. new Path("/aaa"), true);
56. "已经将/aaa目录删除");
57.
58. boolean exists2 = fs.exists(new Path("/aaa"));
59. "/aaa文件夹存在否?" + exists2);
60. fs.close();
61. }
62.
63. /*
64. * 文件信息查看
65. */
66. @Test
67. public void testFileStatus() throws FileNotFoundException, IllegalArgumentException, IOException {
68. //只能列出文件信息
69. new Path("/"), true);
70. while (listFiles.hasNext()) {
71. LocatedFileStatus fileStatus = listFiles.next();
72. System.out.println(fileStatus.getPath().getName());
73. }
74.
75. "-----------------------");
76. //能列出文件和文件夹信息
77. new Path("/"));
78. for (FileStatus f : listStatus) {
79. "-";
80. if (f.isDirectory()) type = "d";
81. "\t" + f.getPath().getName());
82. }
83. fs.close();
84. }
85.
86. @Test
87. public void testOthers() throws IllegalArgumentException, IOException {
88. //文件偏移量信息
89. new Path("/docker"), 0, 143588167);
90. for (BlockLocation location : fileBlockLocations) {
91. System.out.println(location.getOffset());
92. 0]);
93. }
94.
95. //修改文件名
96. new Path("/docker"), new Path("/docker.tgz"));
97.
98. //修改一个文件的副本数量
99. new Path("/docker.tgz"), (short) 2);
100. fs.close();
101. }
102. }
四:Java客户端IO流操作:
1. import java.io.FileInputStream;
2. import java.io.FileOutputStream;
3. import java.io.IOException;
4. import java.net.URI;
5. import java.net.URISyntaxException;
6.
7. import org.apache.hadoop.conf.Configuration;
8. import org.apache.hadoop.fs.FSDataInputStream;
9. import org.apache.hadoop.fs.FSDataOutputStream;
10. import org.apache.hadoop.fs.FileSystem;
11. import org.apache.hadoop.fs.Path;
12. import org.apache.hadoop.io.IOUtils;
13. import org.junit.Before;
14. import org.junit.Test;
15.
16.
17. public class HdfsIO {
18. null;
19.
20. @Before
21. public void init() throws IOException, InterruptedException, URISyntaxException{
22. // 构造一个配置参数封装对象
23. new Configuration();
24. // 构造一个hdfs的客户端
25. new URI("hdfs://192.168.10.252:9000"), conf, "root");
26. }
27.
28. /*
29. * 下载文件
30. */
31. @Test
32. public void testDownload() throws IllegalArgumentException, IOException{
33. new Path("/docker"));
34. new FileOutputStream("/Users/cl/Downloads/docker");
35. new Configuration());
36. IOUtils.closeStream(in);
37. IOUtils.closeStream(out);
38. fs.close();
39. }
40.
41. /*
42. * 上传文件
43. */
44. @Test
45. public void testUpload() throws IllegalArgumentException, IOException{
46. new FileInputStream("/Users/cl/Downloads/docker");
47. new Path("/docker"));
48. new Configuration());
49. IOUtils.closeStream(in);
50. IOUtils.closeStream(out);
51. fs.close();
52. }
53.
54. /*
55. * 从指定偏移量读取hdfs中的文件数据
56. * 在分布式数据处理时,可以将数据分片来分配给不同的节点处理
57. */
58. @Test
59. public void testSeek() throws IllegalArgumentException, IOException{
60. new Path("/docker"));
61. 6);//定位,设置起始偏移量
62. new FileOutputStream("/Users/cl/Downloads/docker");
63. new Configuration());
64. IOUtils.closeStream(in);
65. IOUtils.closeStream(out);
66. fs.close();
67. }
68. }
69.