这是我第一次发布到stackoverflow,所以如果我做错了,我会道歉。
我最近建立了一个新的hadoop集群,这是我第一次尝试使用Hadoop 2和YARN。我提交作业时,目前收到以下错误。
java.io.IOException: Cannot initialize Cluster. Please check your configuration for mapreduce.framework.name and the correspond server addresses.
at org.apache.hadoop.mapreduce.Cluster.initialize(Cluster.java:120)
at org.apache.hadoop.mapreduce.Cluster.<init>(Cluster.java:82)
at org.apache.hadoop.mapreduce.Cluster.<init>(Cluster.java:75)
at org.apache.hadoop.mapreduce.Job$9.run(Job.java:1255)
at org.apache.hadoop.mapreduce.Job$9.run(Job.java:1251)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
at org.apache.hadoop.mapreduce.Job.connect(Job.java:1250)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:1279)
at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1303)
以下是我的配置文件:
mapred-site.xml中
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
HDFS-site.xml中
<configuration>
<property>
<name>dfs.name.dir</name>
<value>/temp1/nn,/temp2/nn</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>/temp1/dn,/temp2/dn</value>
</property>
<property>
<name>fs.checkpoint.dir</name>
<value>/temp1/snn</value>
</property>
<property>
<name>dfs.permissions.supergroup</name>
<value>hrdbms</value>
</property>
<property>
<name>dfs.block.size</name>
<value>268435456</value>
</property>
</configuration>
纱-site.xml中
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>172.31.20.99</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/temp1/y1,/temp2/y1</value>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>/temp1/y2,/temp2/y2</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
这是我的java代码:
Configuration conf = new Configuration();
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
conf.setBoolean("mapred.compress.map.output",true);
conf.addResource(new org.apache.hadoop.fs.Path("/usr/local/hadoop-2.5.1/etc/hadoop/core-site.xml"));
conf.addResource(new org.apache.hadoop.fs.Path("/usr/local/hadoop-2.5.1/etc/hadoop/hdfs-site.xml"));
conf.addResource(new org.apache.hadoop.fs.Path("/usr/local/hadoop-2.5.1/etc/hadoop/yarn-site.xml"));
conf.set("mapreduce.framework.name", "yarn");
conf.setClass("mapred.map.output.compression.codec", org.apache.hadoop.io.compress.SnappyCodec.class, CompressionCodec.class);
Job job = new Job(conf);
job.setJarByClass(LoadMapper.class);
job.setJobName("Load " + schema + "." + table);
job.setMapperClass(LoadMapper.class);
job.setReducerClass(LoadReducer.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(ALOWritable.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(ALOWritable.class);
job.setNumReduceTasks(workerNodes.size());
job.setOutputFormatClass(LoadOutputFormat.class);
job.setReduceSpeculativeExecution(false);
job.setMapSpeculativeExecution(false);
String glob2 = glob.substring(6);
FileInputFormat.addInputPath(job, new org.apache.hadoop.fs.Path(glob2));
HRDBMSWorker.logger.debug("Submitting MR job");
boolean allOK = job.waitForCompletion(true);
以下是启动JVM时所有的环境变量
HADOOP_DATANODE_OPTS=-Dhadoop.security.logger=ERROR,RFAS
HOSTNAME=ip-172-31-20-103
HADOOP_IDENT_STRING=hrdbms
SHELL=/bin/bash
TERM=xterm
HADOOP_HOME=/usr/local/hadoop-2.5.1
HISTSIZE=1000
HADOOP_PID_DIR=
YARN_HOME=/usr/local/hadoop-2.5.1
USER=hrdbms
LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=01;05;37;41:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arj=01;31:*.taz=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.zip=01;31:*.z=01;31:*.Z=01;31:*.dz=01;31:*.gz=01;31:*.lz=01;31:*.xz=01;31:*.bz2=01;31:*.tbz=01;31:*.tbz2=01;31:*.bz=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.rar=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.jpg=01;35:*.jpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.axv=01;35:*.anx=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=01;36:*.au=01;36:*.flac=01;36:*.mid=01;36:*.midi=01;36:*.mka=01;36:*.mp3=01;36:*.mpc=01;36:*.ogg=01;36:*.ra=01;36:*.wav=01;36:*.axa=01;36:*.oga=01;36:*.spx=01;36:*.xspf=01;36:
HADOOP_SECURE_DN_PID_DIR=
HADOOP_SECURE_DN_LOG_DIR=/
MAIL=/var/spool/mail/hrdbms
PATH=/usr/local/bin:/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/sbin:/home/hrdbms/bin
HADOOP_HDFS_HOME=/usr/local/hadoop-2.5.1
HADOOP_CLIENT_OPTS=-Xmx512m
HADOOP_COMMON_HOME=/usr/local/hadoop-2.5.1
PWD=/home/hrdbms
JAVA_HOME=/usr/lib/jvm/java-1.7.0-openjdk-1.7.0.55.x86_64/jre
HADOOP_CLASSPATH=/home/hrdbms/HRDBMS.jar:/contrib/capacity-scheduler/*.jar
HADOOP_CONF_DIR=/etc/hadoop
LANG=en_US.UTF-8
HADOOP_PORTMAP_OPTS=-Xmx512m
HADOOP_OPTS= -Djava.net.preferIPv4Stack=true
HADOOP_SECONDARYNAMENODE_OPTS=-Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender
HISTCONTROL=ignoredups
SHLVL=1
HOME=/home/hrdbms
YARN_CONF_DIR=/etc/hadoop
HADOOP_SECURE_DN_USER=
HADOOP_NAMENODE_OPTS=-Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender
HADOOP_MAPRED_HOME=/usr/local/hadoop-2.5.1
LOGNAME=hrdbms
HADOOP_NFS3_OPTS=
LESSOPEN=|/usr/bin/lesspipe.sh %s
HADOOP_YARN_USER=hrdbms
G_BROKEN_FILENAMES=1
_=/bin/env
以下是客户端类路径中所有jar的列表
activation-1.1.jar
antlr-4.2.1-complete.jar
aopalliance-1.0.jar
apacheds-i18n-2.0.0-M15.jar
apacheds-kerberos-codec-2.0.0-M15.jar
api-asn1-api-1.0.0-M20.jar
api-util-1.0.0-M20.jar
asm-3.2.jar
avro-1.7.4.jar
commons-beanutils-1.7.0.jar
commons-beanutils-core-1.8.0.jar
commons-cli-1.2.jar
commons-codec-1.3.jar
commons-codec-1.4.jar
commons-collections-3.2.1.jar
commons-compress-1.4.1.jar
commons-configuration-1.6.jar
commons-daemon-1.0.13.jar
commons-digester-1.8.jar
commons-el-1.0.jar
commons-httpclient-3.1.jar
commons-io-2.4.jar
commons-lang-2.6.jar
commons-logging-1.1.3.jar
commons-math3-3.1.1.jar
commons-net-3.1.jar
guava-11.0.2.jar
guice-3.0.jar
guice-servlet-3.0.jar
hadoop-annotations-2.5.1.jar
hadoop-archives-2.5.1.jar
hadoop-auth-2.5.1.jar
hadoop-common-2.5.1-tests.jar
hadoop-common-2.5.1.jar
hadoop-datajoin-2.5.1.jar
hadoop-distcp-2.5.1.jar
hadoop-extras-2.5.1.jar
hadoop-gridmix-2.5.1.jar
hadoop-hdfs-2.5.1-tests.jar
hadoop-hdfs-2.5.1.jar
hadoop-hdfs-nfs-2.5.1.jar
hadoop-mapreduce-client-app-2.5.1.jar
hadoop-mapreduce-client-common-2.5.1.jar
hadoop-mapreduce-client-core-2.5.1.jar
hadoop-mapreduce-client-hs-2.5.1.jar
hadoop-mapreduce-client-hs-plugins-2.5.1.jar
hadoop-mapreduce-client-jobclient-2.5.1-tests.jar
hadoop-mapreduce-client-jobclient-2.5.1.jar
hadoop-mapreduce-client-shuffle-2.5.1.jar
hadoop-mapreduce-examples-2.5.1.jar
hadoop-nfs-2.5.1.jar
hadoop-openstack-2.5.1.jar
hadoop-rumen-2.5.1.jar
hadoop-sls-2.5.1.jar
hadoop-streaming-2.5.1.jar
hadoop-yarn-api-2.5.1.jar
hadoop-yarn-applications-distributedshell-2.5.1.jar
hadoop-yarn-applications-unmanaged-am-launcher-2.5.1.jar
hadoop-yarn-client-2.5.1.jar
hadoop-yarn-common-2.5.1.jar
hadoop-yarn-server-applicationhistoryservice-2.5.1.jar
hadoop-yarn-server-common-2.5.1.jar
hadoop-yarn-server-nodemanager-2.5.1.jar
hadoop-yarn-server-resourcemanager-2.5.1.jar
hadoop-yarn-server-tests-2.5.1.jar
hadoop-yarn-server-web-proxy-2.5.1.jar
hamcrest-core-1.3.jar
httpclient-4.2.5.jar
httpcore-4.2.5.jar
jackson-core-asl-1.9.13.jar
jackson-jaxrs-1.9.13.jar
jackson-mapper-asl-1.9.13.jar
jackson-xc-1.9.13.jar
jasper-compiler-5.5.23.jar
jasper-runtime-5.5.23.jar
java-xmlbuilder-0.4.jar
javax.inject-1.jar
jaxb-api-2.2.2.jar
jaxb-impl-2.2.3-1.jar
jersey-client-1.9.jar
jersey-core-1.9.jar
jersey-guice-1.9.jar
jersey-json-1.9.jar
jersey-server-1.9.jar
jets3t-0.9.0.jar
jettison-1.1.jar
jetty-6.1.26.jar
jetty-util-6.1.26.jar
jline-0.9.94.jar
jsch-0.1.50.jar
jsp-api-2.1.jar
jsr305-1.3.9.jar
junit-4.11.jar
leveldbjni-all-1.8.jar
log4j-1.2.17.jar
metrics-core-3.0.0.jar
mockito-all-1.8.5.jar
netty-3.6.2.Final.jar
paranamer-2.3.jar
preflight-app-1.8.7.jar
protobuf-java-2.5.0.jar
servlet-api-2.5.jar
slf4j-api-1.7.5.jar
slf4j-log4j12-1.7.5.jar
snappy-java-1.0.4.1.jar
stax-api-1.0-2.jar
xmlenc-0.52.jar
zookeeper-3.4.6.jar
请帮忙!谢谢!
编辑:我刚刚发现了这些调试日志消息。
2014-10-27 19:31:21,789 DEBUG Cluster:尝试ClientProtocolProvider:org.apache.hadoop.mapred.LocalClientProtocolProvider
2014-10-27 19:31:21,789 DEBUG Cluster:无法选择org.apache.hadoop.mapred.LocalClientProtocolProvider作为ClientProtocolProvider - 返回null协议
答案 0 :(得分:3)
我今天遇到过类似的问题。在我的情况下,我正在构建一个超级jar,其中一些依赖(我还没有发现罪魁祸首)是带来META-INF / services / org.apache.hadoop.mapreduce.protocol.ClientProtocolProvider的内容:
org.apache.hadoop.mapred.LocalClientProtocolProvider
我在项目中提供了自己的(例如把它放在类路径上),其中包含以下内容:
org.apache.hadoop.mapred.YarnClientProtocolProvider
并拾取正确的一个。我怀疑你看到的相似。要修复,请创建上述文件,并将其放在类路径上。如果我找到罪魁祸首Jar,我会更新答案。
答案 1 :(得分:0)
关闭群集中的安全性(如果您的环境中显然没有问题)。 即关闭此HDFS设置,即:
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
默认情况下处于启用状态。 在Cloudera管理器中,可以从“配置”面板访问它。
答案 2 :(得分:0)
我遇到了同样的问题,尝试使用最新的CDH5.5 Hadoop 2.6发行版从Eclipse运行MR作业。
我不确定具体是什么问题,它很可能是@ timrobertson100提到的类加载问题....但在我的情况下,我能够通过将以下路径中的所有jar添加到Eclipse项目的类路径:
.../hadoop-2.6.0-cdh5.5.1/share/hadoop/common/hadoop-common-2.6.0-cdh5.5.1.jar
.../hadoop-2.6.0-cdh5.5.1/share/hadoop/common/lib/*
.../hadoop-2.6.0-cdh5.5.1/share/hadoop/mapreduce2/*
.../hadoop-2.6.0-cdh5.5.1/share/hadoop/yarn/*
码头