Spark和spakrR的新手。对于Hadoop,只有一个名为winutils/bin/winutils.exe
的文件。
正在运行系统:
我可以在本地计算机上读取数据,但是对于部署的工作人员,我不能这样做。
有人能帮助我吗?
Sys.setenv(SPARK_HOME = "D:/SPARK2")
library(SparkR)
sparkR.session(master = "local[*]", enableHiveSupport = FALSE,sparkConfig = list(spark.driver.memory="4g",spark.sql.warehouse.dir = "d:/winutils/bin",sparkPackages = "com.databricks:spark-avro_2.11:3.0.1"))
Java ref type org.apache.spark.sql.SparkSession id 1
multiPeople <- read.json(c(paste(getwd(),"people.json",sep = "/"),"D:/RwizSpark_Private/people2.json"))
rand_10m_x <- read.df(x = "./demo.csv",source = "csv", inferSchema="true",na.strings= "NA")
Sys.setenv(SPARK_HOME = "D:/SPARK2")
library(SparkR)
sparkR.session(master = "spark:/mymasterIP", enableHiveSupport = FALSE,appName = "sparkRenzhi", sparkConfig = list(spark.driver.memory="6g",spark.sql.warehouse.dir = "d:/winutils/bin",spark.executor.memory = "2g", spark.executor.cores= "2"),sparkPackages = "com.databricks:spark-avro_2.11:3.0.1")
Java ref type org.apache.spark.sql.SparkSession id 1
multiPeople <- read.json(c(paste(getwd(),"people.json",sep = "/"),"D:/RwizSpark_Private/people2.json"))
invokeJava出错(isStatic = FALSE,objId $ id,methodName,...): org.apache.spark.SparkException:作业因阶段失败而中止:阶段0.0中的任务1失败4次,最近失败:阶段0.0中失去的任务1.3(TID 6,172.29.110.101):java.io.FileNotFoundException:File file:/ D:/RwizSpark_Private/people2.json不存在 在org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:609) at org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:822) 在org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:599) 在org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:421) 在org.apache.hadoop.fs.ChecksumFileSystem $ ChecksumFSInputChecker。(ChecksumFileSystem.java:140) 在org.apache.hadoop.fs.ChecksumFileSystem.open(ChecksumFileSystem.java:341) 在org.apache.hadoop.fs.FileSystem.open(FileSystem.java:767) 在org.apache.hadoop.mapred.LineRecordReader。(LineRecordReader.java:109) 在org.apache.hadoop.mapre
rand_10m_x <- read.df(x = "./demo.csv",source = "csv", inferSchema="true",na.strings= "NA")
invokeJava出错(isStatic = TRUE,className,methodName,...): org.apache.spark.SparkException:作业因阶段失败而中止:阶段1.0中的任务0失败4次,最近失败:阶段1.0中失去的任务0.3(TID 11,172.29.110.101):java.io.FileNotFoundException:File file:/ D:/RwizSpark_Private/demo.csv不存在 在org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:609) at org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:822) 在org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:599) 在org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:421) 在org.apache.hadoop.fs.ChecksumFileSystem $ ChecksumFSInputChecker。(ChecksumFileSystem.java:140) 在org.apache.hadoop.fs.ChecksumFileSystem.open(ChecksumFileSystem.java:341) 在org.apache.hadoop.fs.FileSystem.open(FileSystem.java:767) 在org.apache.hadoop.mapred.LineRecordReader。(LineRecordReader.java:109) 在org.apache.hadoop.mapred.T