#!/usr/bin/env bash
echo textFile :"$1"
echo mapper : "$2"
echo reducer: "$3"
echo inputDir :"$4"
echo outputDir: "$5"
hdfs dfs -ls ~
hdfs dfs -rm ~/"$2"
hdfs dfs -rm ~/"$3"
hdfs dfs -copyFromLocal "$2" ~ # copies mapper.py file from argument to hdfs dir
hdfs dfs -copyFromLocal "$3" ~ # copies reducer.py file from argument to hdfs dir
hdfs dfs -test -d ~/"$5" #checks to see if hadoop output dir exists
if [ $? == '0' ]; then
hdfs dfs -rm -r ~/"$5"
else
echo "Output file doesn't exist and will be created when hadoop runs"
fi
hdfs dfs -test -d ~/"$4" #checks to see if hadoop input dir exists
if [ $? == 0 ]; then
hdfs dfs -rm -r ~/"$4"
echo "Hadoop input dir alread exists deleting it now and creating a new one..."
hdfs dfs -mkdir ~/"$4" # makes an input dir for text file to be put in
else
echo "Input file doesn't exist will be created now"
hdfs dfs -mkdir ~/"$4" # makes an input dir for text file to be put in
fi
hdfs dfs -copyFromLocal /home/hduser/"$1" ~/"$4" # sends textfile from local to hdfs folder
# runs the hadoop mapreduce program with given parameters
hadoop jar /usr/local/hadoop/share/hadoop/tools/lib/hadoop-streaming-2.6.2.jar \
-input /home/hduser/"$4"/* \
-output /home/hduser/"$5" \
-file /home/hduser/"$2" \
-mapper /home/hduser/"$2" \
-file /home/hduser/"$3" \
-reducer /home/hduser/"$3"
我想避免连接所有命令来运行简单的mapreduce每次我想测试mapper和reducer文件所以我写了这个脚本,我是shell脚本的新手。我附上了屏幕
答案 0 :(得分:0)
你应该纠正两个明显的细节: