我有一个要求,在Map Reduce代码中应该读取每个节点中的本地文件系统。该程序将在HDFS上运行,我无法在xml文件中更改hadoop的FileSystem属性以进行配置。
我尝试了以下解决方案,但没有人给我结果。
方法1
Configuration config = new Configuration();
FileSystem localFileSystem = FileSystem.get(config);
localFileSystem.set("fs.defaultFS", "file:///");
BufferedReader bufferRedaer = new BufferedReader(new InputStreamReader(localFileSystem.open(new Path("/user/input/localFile"))));
方法2
Configuration config = new Configuration();
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
BufferedReader bufferRedaer = new BufferedReader(new InputStreamReader(localFileSystem.open(new Path("/user/input/localFile"))));
方法3
Configuration config = new Configuration();
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
localFileSystem.set("fs.defaultFS", "file:///");
BufferedReader bufferRedaer = new BufferedReader(new InputStreamReader(localFileSystem.open(new Path("/user/input/localFile"))));
方法4
Configuration config = new Configuration();
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
BufferedReader bufferRedaer = new BufferedReader(new InputStreamReader(localFileSystem.getRaw().open(new Path("/user/input/localFile"))));
这也不起作用 [Reading HDFS and local files in Java
他们每个人都给出了错误:没有这样的文件
错误堆栈
attempt_201406050021_0018_m_000000_2: java.io.FileNotFoundException: File /home/cloudera/sftp/id_rsa does not exist
attempt_201406050021_0018_m_000000_2: at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:468)
attempt_201406050021_0018_m_000000_2: at org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:380)
attempt_201406050021_0018_m_000000_2: at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:231)
attempt_201406050021_0018_m_000000_2: at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:183)
attempt_201406050021_0018_m_000000_2: at org.apache.hadoop.fs.LocalFileSystem.copyFromLocalFile(LocalFileSystem.java:81)
attempt_201406050021_0018_m_000000_2: at org.apache.hadoop.fs.FileSystem.copyFromLocalFile(FileSystem.java:1934)
attempt_201406050021_0018_m_000000_2: at com.skanda.ecomm.sftp.FTPMapper.configure(FTPMapper.java:91)
我希望在这里得到积极的解决方案。让我知道我哪里出错了。
主类(驱动程序类)
/*
* @SFTPClient.java @May 20, 2014
*
*
*/
package com.skanda.ecomm.sftp;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
*
* <p>
* SFTPClient Class
* </p>
*
* @author skanda
* @version 1.0
*
*/
public class SFTPClient extends Configured implements Tool {
public int run(String[] args) throws Exception {
Configuration config = getConf();
String inputPath = config.get(ApplicationConstants.INPUT_PATH);
String outputPath = config.get(ApplicationConstants.OUTPUT_PATH);
String configPath = config.get(ApplicationConstants.CONFIG_PATH);
int reducers = Integer.parseInt(config.get(ApplicationConstants.REDUCERS));
if(outputPath == null || inputPath == null || configPath == null) {
throw new Exception("Usage: \n" + "-D configPath=<configPath> -D inputPath=<inputPath> -D reducers=<reducers" +
"-D outputPath=<path>");
}
JobConf conf = new JobConf(SFTPClient.class);
conf.setJobName("SFTP Injection client");
DistributedCache.addCacheFile(new URI(configPath),conf);
conf.setMapperClass(FTPMapper.class);
conf.setReducerClass(FTPReducer.class);
conf.setMapOutputKeyClass(IntWritable.class);
conf.setMapOutputValueClass(Text.class);
conf.setOutputKeyClass(IntWritable.class);
conf.setOutputValueClass(IntWritable.class);
// configuration should contain reference to your namenode
FileSystem fs = FileSystem.get(new Configuration());
fs.delete(new Path(outputPath), true); // true stands for recursively, deleting the folder you gave
conf.setStrings(ApplicationConstants.INPUT_PATH, inputPath);
conf.setStrings(ApplicationConstants.OUTPUT_PATH, outputPath);
FileInputFormat.setInputPaths(conf, new Path(inputPath));
FileOutputFormat.setOutputPath(conf, new Path(outputPath));
conf.setNumReduceTasks(reducers);
conf.setInt(ApplicationConstants.NUNBER_OF_REDUCERS, reducers);
JobClient.runJob(conf);
return 0;
}
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new SFTPClient(), args);
System.exit(exitCode);
}
}
映射
/*
* @FTPMapper.java @May 20, 2014
*
*
*/
package com.skanda.ecomm.sftp;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.InetAddress;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import com.ftp.mapreduce.CommonUtility;
import com.ftp.mapreduce.RetrieveFileNames;
import com.jcraft.jsch.hm.Channel;
/**
*
* <p>
* FTP Mapper Class
* </p>
*
* @author skanda
* @version 1.0
*
*/
@SuppressWarnings("unused")
public class FTPMapper extends MapReduceBase implements Mapper<LongWritable, Text, IntWritable, Text> {
private URI[] localFiles;
private String userName;
private String hostName;
private String folderPath;
private int reducers;
private byte[] pvtKey;
private String fileName;
private String startDate;
private String endDate;
private String sshKeyPath;
private String password;
public void configure(JobConf job) {
Properties properties = new Properties();
try {
localFiles = DistributedCache.getCacheFiles(job);
if (localFiles != null && localFiles.length == 1) {
Configuration conf = new Configuration();
FileSystem fileSystem = FileSystem.get(localFiles[0], conf);
BufferedReader bufferRedaer=new BufferedReader(new InputStreamReader(fileSystem.open(new Path(localFiles[0]))));
properties.load(bufferRedaer);
userName = properties.getProperty(ApplicationConstants.USER_NAME);
reducers = job.getInt(ApplicationConstants.NUNBER_OF_REDUCERS, 30);
hostName = properties.getProperty(ApplicationConstants.SFTP_SERVER_HOST);
folderPath = properties.getProperty(ApplicationConstants.HOSTFILE_DIRECTORY_PATH);
fileName = properties.getProperty(ApplicationConstants.FILE_NAME_PATTERN);
startDate = properties.getProperty(ApplicationConstants.FILE_START_DATE);
endDate = properties.getProperty(ApplicationConstants.FILE_END_DATE);
sshKeyPath = properties.getProperty(ApplicationConstants.SSH_KEY_PATH);
password = properties.getProperty(ApplicationConstants.PASSWORD);
System.out.println("--------------------------------------------------");
/*FileSystem fs = FileSystem.getLocal(conf);
//Path inputPath = fs.makeQualified(new Path(sshKeyPath));
String inputPath = new Path("file:///home/cloudera/"+sshKeyPath).toUri().getPath();
fs.copyFromLocalFile(new Path(inputPath), new Path("outputSFTP/idFile") );*/
try{
Configuration conf1 = new Configuration();
Path pt = new Path("file:///home/cloudera/.ssh/id_rsa");
FileSystem fs = FileSystem.get( new URI("file:///home/cloudera/.ssh/id_rsa"), conf);
LocalFileSystem localFileSystem = fs.getLocal(conf1);
BufferedReader bufferRedaer1 = new BufferedReader(new InputStreamReader(localFileSystem.open(pt)));
String str = null;
while ((str = bufferRedaer1.readLine())!= null)
{
System.out.println("-----------");
System.out.println(str);
}
}catch(Exception e){
System.out.println("failed again");
String computername=InetAddress.getLocalHost().getHostName();
System.out.println(computername);
e.printStackTrace();
}
System.out.println("--------------------------------------------------");
Configuration config = new Configuration();
config.set("fs.defaultFS", "file:////");
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
bufferRedaer = new BufferedReader(new InputStreamReader(localFileSystem.open(new Path(sshKeyPath))));
/*Configuration config = new Configuration();
//config.set("fs.defaultFS", "file:///home/cloudera/.ssh/id_rsa");
LocalFileSystem fileSystm = FileSystem.getLocal(config);
Path path = fileSystm.makeQualified(new Path("/home/cloudera/.ssh/id_rsa"));*/
//FileInputFormat.setInputPaths(job, path);
//bufferRedaer = new BufferedReader(new InputStreamReader(fileSystem.open(path)));
String key = "";
try {
String line = "";
while ((line = bufferRedaer.readLine()) != null) {
key += line + "\n";
}
pvtKey = key.getBytes();
} catch(Exception e){
e.printStackTrace();
} finally {
//fileSystem.close();
//bufferRedaer.close();
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
public void map(LongWritable key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter)
throws IOException {
List<String> filterFileNamesList = new ArrayList<String>();
Channel channel = CommonUtility.connectSFTP(userName, hostName, pvtKey);
Map<String, String> fileNamesMap = CommonUtility.getFileNames(channel, folderPath);
List<String> filterFileNameList_output = RetrieveFileNames.FILTER_BY_NAME.retrieveFileNames(fileNamesMap, filterFileNamesList,
fileName, startDate, endDate);
for (int i = 0; i < filterFileNameList_output.size(); i++) {
int keyGroup = i % reducers;
output.collect(new IntWritable(keyGroup), new Text(filterFileNameList_output.get(i)));
}
}
}
答案 0 :(得分:2)
当程序在hdfs上运行且我的txt文件位于此位置时,此代码对我有用:
<强> /home/Rishi/Documents/RishiFile/r.txt 强>
public class HadoopRead {
public static void main(String[] args) {
try{
Configuration conf = new Configuration();
Path pt = new Path("/home/Rishi/Documents/RishiFile/r.txt");
FileSystem fs = FileSystem.get( new URI("/home/Rishi/Documents/RishiFile"), conf);
LocalFileSystem localFileSystem = fs.getLocal(conf);
BufferedReader bufferRedaer = new BufferedReader(new InputStreamReader(localFileSystem.open(pt)));
String str = null;
while ((str = bufferRedaer.readLine())!= null)
{
System.out.println("-----------");
System.out.println(str);
}
}catch(Exception e){
e.printStackTrace();
}
}
}
在hdfs上读取本地文件的字数统计示例
我的主要课程
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class FileDriver extends Configured implements Tool {
public static void main(String[] args) {
try{
ToolRunner.run(new Configuration(), new FileDriver(), args);
System.exit(0);
}catch(Exception e){
e.printStackTrace();
}
}
public int run(String[] arg0) throws Exception {
Configuration conf = new Configuration();
Path pt = new Path("file:///home/winoria/Documents/Ri/r");
Job job = new Job(conf, "new Job");
job.setJarByClass(FileDriver.class);
job.setMapperClass(FileMapper.class);
job.setReducerClass(FileReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.setInputPaths(job, pt);
FileSystem.get(job.getConfiguration()).delete(new Path("Output2"), true);
FileOutputFormat.setOutputPath(job, new Path("Output2"));
job.waitForCompletion(true);
return 0;
}
}
mapper类:
public class FileMapper extends Mapper<LongWritable, Text, Text, Text> {
protected void map(LongWritable key, Text value,Context context) throws java.io.IOException ,InterruptedException {
String str[] = value.toString().split(" ");
for(int i =0; i<str.length;i++){
context.write(new Text(str[i]), new Text());
}
};
}
减速机等级:
public class FileReducer extends Reducer<Text, Text, Text, Text> {
protected void reduce(Text key,Iterable<Text> value,Context context) throws java.io.IOException ,InterruptedException {
int count=0;
for (Text text : value) {
count++;
}
context.write(key, new Text(count+""));
};
}