如何在Java程序中连接到Hadoop。 这里有一些细节: 我以html形式从用户那里获取输入,使用JSP来处理表单数据。我想连接到hadoop以根据表单输入获取一些数据。 在这种情况下,如何使用Java连接到Hadoop?
答案 0 :(得分:2)
取决于您对Hadoop的理解。 Hadoop可以以多种方式存储数据,它可以只是hdfs
(Hadoop分布式文件系统)中的文件,也可以是Hive
或Hbase
中的表。有一个最简单的代码来从hdfs读取文件:
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
public class HdfsFileReader {
private static final String NAME_NODE = "hdfs://nameNomeHost:8020";//nameNomeHost = localhost if you use hadoop in local mode
public static void main(String[] args) throws URISyntaxException, IOException {
String fileInHdfs = args[0];
FileSystem fs = FileSystem.get(new URI(NAME_NODE), new Configuration());
String fileContent = IOUtils.toString(fs.open(new Path(fileInHdfs)), "UTF-8");
System.out.println("File content - " + fileContent);
}
}
您需要的Maven依赖项:
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.4</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
答案 1 :(得分:0)
此代码使用cloudera quickstart docker镜像。它将文件从本地文件系统推送到hdfs。它需要导出为Jar文件并在命令行上运行。
示例:java -jar connect_hdfs.jar /local_file.txt push / hdfs_dir_location /
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
import java.net.URISyntaxException;
public class Main {
private static final String NAME_NODE = "hdfs://quickstart.cloudera:8020";
public static void main(String[] args) throws URISyntaxException, IOException {
if (args.length != 3){
throw new IllegalArgumentException ("Must include inputs: source file location, action "
+ "(push or pull), and target file location");
}
String sourceLocation = args[0];
String action = args[1];
String targetLocation = args[2];
Configuration configuration = new Configuration();
configuration.set("fs.defaultFS", NAME_NODE);
configuration.set("fs.hdfs.impl",
org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()
);
configuration.set("fs.file.impl",
org.apache.hadoop.fs.LocalFileSystem.class.getName()
);
FileSystem hdfsFileSystem = FileSystem.get(configuration);
if (action.equals("push")) {
hdfsFileSystem.copyFromLocalFile(new Path(sourceLocation), new Path(targetLocation));
} else if (action.equals("pull")) {
hdfsFileSystem.copyToLocalFile(false, new Path(sourceLocation), new Path(targetLocation), true);
}
}
}
的pom.xml
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>connect_hdfs</groupId>
<artifactId>connect_hdfs</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.0</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
<version>jdk1.7.0_67</version>
<scope>system</scope>
<systemPath>C:/Program Files/Java/jdk1.7.0_67/lib/tools.jar</systemPath>
</dependency>
</dependencies>
<build>
<sourceDirectory>src</sourceDirectory>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.5.1</version>
<configuration>
<source>1.7</source>
<target>1.7</target>
</configuration>
</plugin>
</plugins>
</build>
</project>