我尝试通过读取CSV文件来创建DataFrame。之后,我试图打印10个文件的记录。以下是我在Intellij IDEA中所做的完整流程。 在build.sbt
中添加了所需的依赖项name := "sample"
version := "0.1"
scalaVersion := "2.11.8"
libraryDependencies += "org.apache.spark" % "spark-core_2.11" % "2.0.0"
libraryDependencies += "org.apache.spark" % "spark-sql_2.11" % "2.0.0"
创建了火花环境:
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
trait Context {
lazy val sparkConf: SparkConf = new SparkConf()
.setAppName("Learn Spark")
.setMaster("local[*]")
.set("spark.cores.max","2"
lazy val sparkSession: SparkSession = SparkSession
.builder()
.config(sparkConf)
.getOrCreate()
}
然后创建了Dataframes
object DataFrame_Tutorial extends App with Context {
val dfTags = sparkSession
.read
.option("header","true")
.option("inferSchema","true")
.csv("C:\\Users\\Desktop\\question_tags_10k.csv")
.toDF("id","tag")
dfTags.show(10)
dfTags.printSchema()
}
但是在执行代码时我没有收到任何错误或需要输出。 请帮帮我,我怎样才能看到输出。
输入文件如下:
Id Tag
1 data
4 c#
以下是输出:
"C:\Program Files\Java\jdk1.8.0_151\bin\java" "-javaagent:C:\Program Files\JetBrains\IntelliJ IDEA Community Edition 2017.3.4\lib\idea_rt.jar=61392:C:\Program Files\JetBrains\IntelliJ IDEA Community Edition 2017.3.4\bin" -Dfile.encoding=UTF-8 -classpath "C:\Program Files\Java\jdk1.8.0_151\jre\lib\charsets.jar;C:\Program Files\Java\jdk1.8.0_151\jre\lib\deploy.jar;C:\Program Files\Java\jdk1.8.0_151\jre\lib\ext\access-bridge-64.jar;C:\Program Files\Java\jdk1.8.0_151\jre\lib\ext\cldrdata.jar;C:\Program Files\Java\jdk1.8.0_151\jre\lib\ext\dnsns.jar;C:\Program Files\Java\jdk1.8.0_151\jre\lib\ext\jaccess.jar;C:\Program Files\Java\jdk1.8.0_151\jre\lib\ext\jfxrt.jar;C:\Program Files\Java\jdk1.8.0_151\jre\lib\ext\localedata.jar;C:\Program Files\Java\jdk1.8.0_151\jre\lib\ext\nashorn.jar;C:\Program Files\Java\jdk1.8.0_151\jre\lib\ext\sunec.jar;C:\Program Files\Java\jdk1.8.0_151\jre\lib\ext\sunjce_provider.jar;C:\Program Files\Java\jdk1.8.0_151\jre\lib\ext\sunmscapi.jar;C:\Program Files\Java\jdk1.8.0_151\jre\lib\ext\sunpkcs11.jar;C:\Program Files\Java\jdk1.8.0_151\jre\lib\ext\zipfs.jar;C:\Program Files\Java\jdk1.8.0_151\jre\lib\javaws.jar;C:\Program Files\Java\jdk1.8.0_151\jre\lib\jce.jar;C:\Program Files\Java\jdk1.8.0_151\jre\lib\jfr.jar;C:\Program Files\Java\jdk1.8.0_151\jre\lib\jfxswt.jar;C:\Program Files\Java\jdk1.8.0_151\jre\lib\jsse.jar;C:\Program Files\Java\jdk1.8.0_151\jre\lib\management-agent.jar;C:\Program Files\Java\jdk1.8.0_151\jre\lib\plugin.jar;C:\Program Files\Java\jdk1.8.0_151\jre\lib\resources.jar;C:\Program Files\Java\jdk1.8.0_151\jre\lib\rt.jar;C:\Users\vijub\IdeaProjects\sample\target\scala-2.11\classes;C:\Users\vijub\.ivy2\cache\aopalliance\aopalliance\jars\aopalliance-1.0.jar;C:\Users\vijub\.ivy2\cache\xmlenc\xmlenc\jars\xmlenc-0.52.jar;C:\Users\vijub\.ivy2\cache\oro\oro\jars\oro-2.0.8.jar;C:\Users\vijub\.ivy2\cache\org.xerial.snappy\snappy-java\bundles\snappy-java-1.1.2.4.jar;C:\Users\vijub\.ivy2\cache\org.tukaani\xz\jars\xz-1.0.jar;C:\Users\vijub\.ivy2\cache\org.spark-project.spark\unused\jars\unused-1.0.0.jar;C:\Users\vijub\.ivy2\cache\org.sonatype.sisu.inject\cglib\jars\cglib-2.2.1-v20090111.jar;C:\Users\vijub\.ivy2\cache\org.slf4j\slf4j-log4j12\jars\slf4j-log4j12-1.7.16.jar;C:\Users\vijub\.ivy2\cache\org.slf4j\slf4j-api\jars\slf4j-api-1.7.16.jar;C:\Users\vijub\.ivy2\cache\org.slf4j\jul-to-slf4j\jars\jul-to-slf4j-1.7.16.jar;C:\Users\vijub\.ivy2\cache\org.slf4j\jcl-over-slf4j\jars\jcl-over-slf4j-1.7.16.jar;C:\Users\vijub\.ivy2\cache\org.scalatest\scalatest_2.11\bundles\scalatest_2.11-2.2.6.jar;C:\Users\vijub\.ivy2\cache\org.scala-lang.modules\scala-xml_2.11\bundles\scala-xml_2.11-1.0.4.jar;C:\Users\vijub\.ivy2\cache\org.scala-lang.modules\scala-parser-combinators_2.11\bundles\scala-parser-combinators_2.11-1.0.4.jar;C:\Users\vijub\.ivy2\cache\org.scala-lang\scalap\jars\scalap-2.11.8.jar;C:\Users\vijub\.ivy2\cache\org.scala-lang\scala-reflect\jars\scala-reflect-2.11.8.jar;C:\Users\vijub\.ivy2\cache\org.scala-lang\scala-library\jars\scala-library-2.11.8.jar;C:\Users\vijub\.ivy2\cache\org.scala-lang\scala-compiler\jars\scala-compiler-2.11.8.jar;C:\Users\vijub\.ivy2\cache\org.roaringbitmap\RoaringBitmap\bundles\RoaringBitmap-0.5.11.jar;C:\Users\vijub\.ivy2\cache\org.objenesis\objenesis\jars\objenesis-2.1.jar;C:\Users\vijub\.ivy2\cache\org.mortbay.jetty\jetty-util\jars\jetty-util-6.1.26.jar;C:\Users\vijub\.ivy2\cache\org.json4s\json4s-jackson_2.11\jars\json4s-jackson_2.11-3.2.11.jar;C:\Users\vijub\.ivy2\cache\org.json4s\json4s-core_2.11\jars\json4s-core_2.11-3.2.11.jar;C:\Users\vijub\.ivy2\cache\org.json4s\json4s-ast_2.11\jars\json4s-ast_2.11-3.2.11.jar;C:\Users\vijub\.ivy2\cache\org.javassist\javassist\bundles\javassist-3.18.1-GA.jar;C:\Users\vijub\.ivy2\cache\org.glassfish.jersey.media\jersey-media-jaxb\jars\jersey-media-jaxb-2.22.2.jar;C:\Users\vijub\.ivy2\cache\org.glassfish.jersey.core\jersey-server\jars\jersey-server-2.22.2.jar;C:\Users\vijub\.ivy2\cache\org.glassfish.jersey.core\jersey-common\jars\jersey-common-2.22.2.jar;C:\Users\vijub\.ivy2\cache\org.glassfish.jersey.core\jersey-client\jars\jersey-client-2.22.2.jar;C:\Users\vijub\.ivy2\cache\org.glassfish.jersey.containers\jersey-container-servlet-core\jars\jersey-container-servlet-core-2.22.2.jar;C:\Users\vijub\.ivy2\cache\org.glassfish.jersey.containers\jersey-container-servlet\jars\jersey-container-servlet-2.22.2.jar;C:\Users\vijub\.ivy2\cache\org.glassfish.jersey.bundles.repackaged\jersey-guava\bundles\jersey-guava-2.22.2.jar;C:\Users\vijub\.ivy2\cache\org.glassfish.hk2.external\javax.inject\jars\javax.inject-2.4.0-b34.jar;C:\Users\vijub\.ivy2\cache\org.glassfish.hk2.external\aopalliance-repackaged\jars\aopalliance-repackaged-2.4.0-b34.jar;C:\Users\vijub\.ivy2\cache\org.glassfish.hk2\osgi-resource-locator\jars\osgi-resource-locator-1.0.1.jar;C:\Users\vijub\.ivy2\cache\org.glassfish.hk2\hk2-utils\jars\hk2-utils-2.4.0-b34.jar;C:\Users\vijub\.ivy2\cache\org.glassfish.hk2\hk2-locator\jars\hk2-locator-2.4.0-b34.jar;C:\Users\vijub\.ivy2\cache\org.glassfish.hk2\hk2-api\jars\hk2-api-2.4.0-b34.jar;C:\Users\vijub\.ivy2\cache\org.fusesource.leveldbjni\leveldbjni-all\bundles\leveldbjni-all-1.8.jar;C:\Users\vijub\.ivy2\cache\org.codehaus.janino\janino\jars\janino-2.7.8.jar;C:\Users\vijub\.ivy2\cache\org.codehaus.janino\commons-compiler\jars\commons-compiler-2.7.8.jar;C:\Users\vijub\.ivy2\cache\org.codehaus.jackson\jackson-mapper-asl\jars\jackson-mapper-asl-1.9.13.jar;C:\Users\vijub\.ivy2\cache\org.codehaus.jackson\jackson-core-asl\jars\jackson-core-asl-1.9.13.jar;C:\Users\vijub\.ivy2\cache\org.apache.zookeeper\zookeeper\jars\zookeeper-3.4.5.jar;C:\Users\vijub\.ivy2\cache\org.apache.xbean\xbean-asm5-shaded\bundles\xbean-asm5-shaded-4.4.jar;C:\Users\vijub\.ivy2\cache\org.apache.spark\spark-unsafe_2.11\jars\spark-unsafe_2.11-2.0.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.spark\spark-tags_2.11\jars\spark-tags_2.11-2.0.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.spark\spark-sql_2.11\jars\spark-sql_2.11-2.0.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.spark\spark-sketch_2.11\jars\spark-sketch_2.11-2.0.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.spark\spark-network-shuffle_2.11\jars\spark-network-shuffle_2.11-2.0.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.spark\spark-network-common_2.11\jars\spark-network-common_2.11-2.0.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.spark\spark-launcher_2.11\jars\spark-launcher_2.11-2.0.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.spark\spark-core_2.11\jars\spark-core_2.11-2.0.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.spark\spark-catalyst_2.11\jars\spark-catalyst_2.11-2.0.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.parquet\parquet-jackson\jars\parquet-jackson-1.7.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.parquet\parquet-hadoop\jars\parquet-hadoop-1.7.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.parquet\parquet-generator\jars\parquet-generator-1.7.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.parquet\parquet-format\jars\parquet-format-2.3.0-incubating.jar;C:\Users\vijub\.ivy2\cache\org.apache.parquet\parquet-encoding\jars\parquet-encoding-1.7.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.parquet\parquet-common\jars\parquet-common-1.7.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.parquet\parquet-column\jars\parquet-column-1.7.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.mesos\mesos\jars\mesos-0.21.1-shaded-protobuf.jar;C:\Users\vijub\.ivy2\cache\org.apache.ivy\ivy\jars\ivy-2.4.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.hadoop\hadoop-yarn-server-common\jars\hadoop-yarn-server-common-2.2.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.hadoop\hadoop-yarn-common\jars\hadoop-yarn-common-2.2.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.hadoop\hadoop-yarn-client\jars\hadoop-yarn-client-2.2.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.hadoop\hadoop-yarn-api\jars\hadoop-yarn-api-2.2.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.hadoop\hadoop-mapreduce-client-shuffle\jars\hadoop-mapreduce-client-shuffle-2.2.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.hadoop\hadoop-mapreduce-client-jobclient\jars\hadoop-mapreduce-client-jobclient-2.2.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.hadoop\hadoop-mapreduce-client-core\jars\hadoop-mapreduce-client-core-2.2.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.hadoop\hadoop-mapreduce-client-common\jars\hadoop-mapreduce-client-common-2.2.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.hadoop\hadoop-mapreduce-client-app\jars\hadoop-mapreduce-client-app-2.2.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.hadoop\hadoop-hdfs\jars\hadoop-hdfs-2.2.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.hadoop\hadoop-common\jars\hadoop-common-2.2.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.hadoop\hadoop-client\jars\hadoop-client-2.2.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.hadoop\hadoop-auth\jars\hadoop-auth-2.2.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.hadoop\hadoop-annotations\jars\hadoop-annotations-2.2.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.curator\curator-recipes\bundles\curator-recipes-2.4.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.curator\curator-framework\bundles\curator-framework-2.4.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.curator\curator-client\bundles\curator-client-2.4.0.jar;C:\Users\vijub\.ivy2\cache\org.apache.commons\commons-math3\jars\commons-math3-3.4.1.jar;C:\Users\vijub\.ivy2\cache\org.apache.commons\commons-math\jars\commons-math-2.1.jar;C:\Users\vijub\.ivy2\cache\org.apache.commons\commons-lang3\jars\commons-lang3-3.3.2.jar;C:\Users\vijub\.ivy2\cache\org.apache.commons\commons-compress\jars\commons-compress-1.4.1.jar;C:\Users\vijub\.ivy2\cache\org.apache.avro\avro-mapred\jars\avro-mapred-1.7.7-hadoop2.jar;C:\Users\vijub\.ivy2\cache\org.apache.avro\avro-ipc\jars\avro-ipc-1.7.7-tests.jar;C:\Users\vijub\.ivy2\cache\org.apache.avro\avro-ipc\jars\avro-ipc-1.7.7.jar;C:\Users\vijub\.ivy2\cache\org.apache.avro\avro\jars\avro-1.7.7.jar;C:\Users\vijub\.ivy2\cache\org.antlr\antlr4-runtime\jars\antlr4-runtime-4.5.3.jar;C:\Users\vijub\.ivy2\cache\net.sf.py4j\py4j\jars\py4j-0.10.1.jar;C:\Users\vijub\.ivy2\cache\net.razorvine\pyrolite\jars\pyrolite-4.9.jar;C:\Users\vijub\.ivy2\cache\net.jpountz.lz4\lz4\jars\lz4-1.3.0.jar;C:\Users\vijub\.ivy2\cache\net.java.dev.jets3t\jets3t\jars\jets3t-0.7.1.jar;C:\Users\vijub\.ivy2\cache\log4j\log4j\bundles\log4j-1.2.17.jar;C:\Users\vijub\.ivy2\cache\javax.ws.rs\javax.ws.rs-api\jars\javax.ws.rs-api-2.0.1.jar;C:\Users\vijub\.ivy2\cache\javax.validation\validation-api\jars\validation-api-1.1.0.Final.jar;C:\Users\vijub\.ivy2\cache\javax.servlet\javax.servlet-api\jars\javax.servlet-api-3.1.0.jar;C:\Users\vijub\.ivy2\cache\javax.inject\javax.inject\jars\javax.inject-1.jar;C:\Users\vijub\.ivy2\cache\javax.annotation\javax.annotation-api\jars\javax.annotation-api-1.2.jar;C:\Users\vijub\.ivy2\cache\io.netty\netty-all\jars\netty-all-4.0.29.Final.jar;C:\Users\vijub\.ivy2\cache\io.netty\netty\bundles\netty-3.8.0.Final.jar;C:\Users\vijub\.ivy2\cache\io.dropwizard.metrics\metrics-jvm\bundles\metrics-jvm-3.1.2.jar;C:\Users\vijub\.ivy2\cache\io.dropwizard.metrics\metrics-json\bundles\metrics-json-3.1.2.jar;C:\Users\vijub\.ivy2\cache\io.dropwizard.metrics\metrics-graphite\bundles\metrics-graphite-3.1.2.jar;C:\Users\vijub\.ivy2\cache\io.dropwizard.metrics\metrics-core\bundles\metrics-core-3.1.2.jar;C:\Users\vijub\.ivy2\cache\commons-net\commons-net\jars\commons-net-2.2.jar;C:\Users\vijub\.ivy2\cache\commons-lang\commons-lang\jars\commons-lang-2.5.jar;C:\Users\vijub\.ivy2\cache\commons-io\commons-io\jars\commons-io-2.1.jar;C:\Users\vijub\.ivy2\cache\commons-httpclient\commons-httpclient\jars\commons-httpclient-3.1.jar;C:\Users\vijub\.ivy2\cache\commons-digester\commons-digester\jars\commons-digester-1.8.jar;C:\Users\vijub\.ivy2\cache\commons-configuration\commons-configuration\jars\commons-configuration-1.6.jar;C:\Users\vijub\.ivy2\cache\commons-collections\commons-collections\jars\commons-collections-3.2.1.jar;C:\Users\vijub\.ivy2\cache\commons-codec\commons-codec\jars\commons-codec-1.10.jar;C:\Users\vijub\.ivy2\cache\commons-cli\commons-cli\jars\commons-cli-1.2.jar;C:\Users\vijub\.ivy2\cache\commons-beanutils\commons-beanutils-core\jars\commons-beanutils-core-1.8.0.jar;C:\Users\vijub\.ivy2\cache\commons-beanutils\commons-beanutils\jars\commons-beanutils-1.7.0.jar;C:\Users\vijub\.ivy2\cache\com.univocity\univocity-parsers\jars\univocity-parsers-2.1.1.jar;C:\Users\vijub\.ivy2\cache\com.twitter\chill_2.11\jars\chill_2.11-0.8.0.jar;C:\Users\vijub\.ivy2\cache\com.twitter\chill-java\jars\chill-java-0.8.0.jar;C:\Users\vijub\.ivy2\cache\com.thoughtworks.paranamer\paranamer\jars\paranamer-2.6.jar;C:\Users\vijub\.ivy2\cache\com.ning\compress-lzf\bundles\compress-lzf-1.0.3.jar;C:\Users\vijub\.ivy2\cache\com.google.protobuf\protobuf-java\bundles\protobuf-java-2.5.0.jar;C:\Users\vijub\.ivy2\cache\com.google.inject\guice\jars\guice-3.0.jar;C:\Users\vijub\.ivy2\cache\com.google.guava\guava\bundles\guava-14.0.1.jar;C:\Users\vijub\.ivy2\cache\com.google.code.findbugs\jsr305\jars\jsr305-1.3.9.jar;C:\Users\vijub\.ivy2\cache\com.fasterxml.jackson.module\jackson-module-scala_2.11\bundles\jackson-module-scala_2.11-2.6.5.jar;C:\Users\vijub\.ivy2\cache\com.fasterxml.jackson.module\jackson-module-paranamer\bundles\jackson-module-paranamer-2.6.5.jar;C:\Users\vijub\.ivy2\cache\com.fasterxml.jackson.core\jackson-databind\bundles\jackson-databind-2.6.5.jar;C:\Users\vijub\.ivy2\cache\com.fasterxml.jackson.core\jackson-core\bundles\jackson-core-2.6.5.jar;C:\Users\vijub\.ivy2\cache\com.fasterxml.jackson.core\jackson-annotations\bundles\jackson-annotations-2.6.5.jar;C:\Users\vijub\.ivy2\cache\com.esotericsoftware\minlog\bundles\minlog-1.3.0.jar;C:\Users\vijub\.ivy2\cache\com.esotericsoftware\kryo-shaded\bundles\kryo-shaded-3.0.3.jar;C:\Users\vijub\.ivy2\cache\com.clearspring.analytics\stream\jars\stream-2.7.0.jar" DataFrame_Tutuorial
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
18/02/23 22:09:06 INFO SparkContext: Running Spark version 2.0.0
18/02/23 22:09:07 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
18/02/23 22:09:07 INFO SecurityManager: Changing view acls to: vijub
18/02/23 22:09:07 INFO SecurityManager: Changing modify acls to: vijub
18/02/23 22:09:07 INFO SecurityManager: Changing view acls groups to:
18/02/23 22:09:07 INFO SecurityManager: Changing modify acls groups to:
18/02/23 22:09:07 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(vijub); groups with view permissions: Set(); users with modify permissions: Set(vijub); groups with modify permissions: Set()
18/02/23 22:09:08 INFO Utils: Successfully started service 'sparkDriver' on port 61414.
18/02/23 22:09:08 INFO SparkEnv: Registering MapOutputTracker
18/02/23 22:09:08 INFO SparkEnv: Registering BlockManagerMaster
18/02/23 22:09:08 INFO DiskBlockManager: Created local directory at C:\Users\vijub\AppData\Local\Temp\blockmgr-4f3b4559-5909-493e-8fd1-050860a6d5e4
18/02/23 22:09:08 INFO MemoryStore: MemoryStore started with capacity 901.8 MB
18/02/23 22:09:09 INFO SparkEnv: Registering OutputCommitCoordinator
18/02/23 22:09:09 INFO Utils: Successfully started service 'SparkUI' on port 4040.
18/02/23 22:09:09 INFO SparkUI: Bound SparkUI to 0.0.0.0, and started at http://192.168.56.1:4040
18/02/23 22:09:09 INFO Executor: Starting executor ID driver on host localhost
18/02/23 22:09:09 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 61435.
18/02/23 22:09:09 INFO NettyBlockTransferService: Server created on 192.168.56.1:61435
18/02/23 22:09:09 INFO BlockManagerMaster: Registering BlockManager BlockManagerId(driver, 192.168.56.1, 61435)
18/02/23 22:09:09 INFO BlockManagerMasterEndpoint: Registering block manager 192.168.56.1:61435 with 901.8 MB RAM, BlockManagerId(driver, 192.168.56.1, 61435)
18/02/23 22:09:09 INFO BlockManagerMaster: Registered BlockManager BlockManagerId(driver, 192.168.56.1, 61435)
18/02/23 22:09:10 WARN SparkContext: Use an existing SparkContext, some configuration may not take effect.
18/02/23 22:09:10 INFO SharedState: Warehouse path is 'file:C:\Users\vijub\IdeaProjects\sample/spark-warehouse'.
18/02/23 22:09:13 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 59.6 KB, free 901.7 MB)
18/02/23 22:09:13 INFO MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 14.3 KB, free 901.7 MB)
18/02/23 22:09:13 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on 192.168.56.1:61435 (size: 14.3 KB, free: 901.8 MB)
18/02/23 22:09:13 INFO SparkContext: Created broadcast 0 from csv at DataFrame_Tutuorial.scala:7
18/02/23 22:09:13 INFO FileInputFormat: Total input paths to process : 1
18/02/23 22:09:13 INFO SparkContext: Starting job: csv at DataFrame_Tutuorial.scala:7
18/02/23 22:09:13 INFO DAGScheduler: Got job 0 (csv at DataFrame_Tutuorial.scala:7) with 1 output partitions
18/02/23 22:09:13 INFO DAGScheduler: Final stage: ResultStage 0 (csv at DataFrame_Tutuorial.scala:7)
18/02/23 22:09:13 INFO DAGScheduler: Parents of final stage: List()
18/02/23 22:09:13 INFO DAGScheduler: Missing parents: List()
18/02/23 22:09:13 INFO DAGScheduler: Submitting ResultStage 0 (MapPartitionsRDD[2] at csv at DataFrame_Tutuorial.scala:7), which has no missing parents
18/02/23 22:09:13 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 3.3 KB, free 901.7 MB)
18/02/23 22:09:13 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 2024.0 B, free 901.7 MB)
18/02/23 22:09:13 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on 192.168.56.1:61435 (size: 2024.0 B, free: 901.8 MB)
18/02/23 22:09:13 INFO SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:1012
18/02/23 22:09:13 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 0 (MapPartitionsRDD[2] at csv at DataFrame_Tutuorial.scala:7)
18/02/23 22:09:13 INFO TaskSchedulerImpl: Adding task set 0.0 with 1 tasks
18/02/23 22:09:14 INFO TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0, localhost, partition 0, PROCESS_LOCAL, 5375 bytes)
18/02/23 22:09:14 INFO Executor: Running task 0.0 in stage 0.0 (TID 0)
18/02/23 22:09:14 INFO HadoopRDD: Input split: file:/C:/Users/vijub/Desktop/input1.txt:0+78080
18/02/23 22:09:14 INFO deprecation: mapred.tip.id is deprecated. Instead, use mapreduce.task.id
18/02/23 22:09:14 INFO deprecation: mapred.task.id is deprecated. Instead, use mapreduce.task.attempt.id
18/02/23 22:09:14 INFO deprecation: mapred.task.is.map is deprecated. Instead, use mapreduce.task.ismap
18/02/23 22:09:14 INFO deprecation: mapred.task.partition is deprecated. Instead, use mapreduce.task.partition
18/02/23 22:09:14 INFO deprecation: mapred.job.id is deprecated. Instead, use mapreduce.job.id
18/02/23 22:09:14 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 925 bytes result sent to driver
18/02/23 22:09:14 INFO TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 187 ms on localhost (1/1)
18/02/23 22:09:14 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool
18/02/23 22:09:14 INFO DAGScheduler: ResultStage 0 (csv at DataFrame_Tutuorial.scala:7) finished in 0.210 s
18/02/23 22:09:14 INFO DAGScheduler: Job 0 finished: csv at DataFrame_Tutuorial.scala:7, took 0.321394 s
18/02/23 22:09:14 INFO MemoryStore: Block broadcast_2 stored as values in memory (estimated size 127.2 KB, free 901.6 MB)
18/02/23 22:09:14 INFO MemoryStore: Block broadcast_2_piece0 stored as bytes in memory (estimated size 14.3 KB, free 901.6 MB)
18/02/23 22:09:14 INFO BlockManagerInfo: Added broadcast_2_piece0 in memory on 192.168.56.1:61435 (size: 14.3 KB, free: 901.8 MB)
18/02/23 22:09:14 INFO SparkContext: Created broadcast 2 from csv at DataFrame_Tutuorial.scala:7
18/02/23 22:09:14 INFO FileInputFormat: Total input paths to process : 1
18/02/23 22:09:14 INFO SparkContext: Starting job: csv at DataFrame_Tutuorial.scala:7
18/02/23 22:09:14 INFO DAGScheduler: Got job 1 (csv at DataFrame_Tutuorial.scala:7) with 1 output partitions
18/02/23 22:09:14 INFO DAGScheduler: Final stage: ResultStage 1 (csv at DataFrame_Tutuorial.scala:7)
18/02/23 22:09:14 INFO DAGScheduler: Parents of final stage: List()
18/02/23 22:09:14 INFO DAGScheduler: Missing parents: List()
18/02/23 22:09:14 INFO DAGScheduler: Submitting ResultStage 1 (MapPartitionsRDD[5] at csv at DataFrame_Tutuorial.scala:7), which has no missing parents
18/02/23 22:09:14 INFO MemoryStore: Block broadcast_3 stored as values in memory (estimated size 3.3 KB, free 901.6 MB)
18/02/23 22:09:14 INFO MemoryStore: Block broadcast_3_piece0 stored as bytes in memory (estimated size 2016.0 B, free 901.6 MB)
18/02/23 22:09:14 INFO BlockManagerInfo: Added broadcast_3_piece0 in memory on 192.168.56.1:61435 (size: 2016.0 B, free: 901.8 MB)
18/02/23 22:09:14 INFO SparkContext: Created broadcast 3 from broadcast at DAGScheduler.scala:1012
18/02/23 22:09:14 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 1 (MapPartitionsRDD[5] at csv at DataFrame_Tutuorial.scala:7)
18/02/23 22:09:14 INFO TaskSchedulerImpl: Adding task set 1.0 with 1 tasks
18/02/23 22:09:14 INFO TaskSetManager: Starting task 0.0 in stage 1.0 (TID 1, localhost, partition 0, PROCESS_LOCAL, 5375 bytes)
18/02/23 22:09:14 INFO Executor: Running task 0.0 in stage 1.0 (TID 1)
18/02/23 22:09:14 INFO HadoopRDD: Input split: file:/C:/Users/vijub/Desktop/input1.txt:0+78080
18/02/23 22:09:14 INFO Executor: Finished task 0.0 in stage 1.0 (TID 1). 1012 bytes result sent to driver
18/02/23 22:09:14 INFO TaskSetManager: Finished task 0.0 in stage 1.0 (TID 1) in 49 ms on localhost (1/1)
18/02/23 22:09:14 INFO DAGScheduler: ResultStage 1 (csv at DataFrame_Tutuorial.scala:7) finished in 0.050 s
18/02/23 22:09:14 INFO TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool
18/02/23 22:09:14 INFO DAGScheduler: Job 1 finished: csv at DataFrame_Tutuorial.scala:7, took 0.093121 s
18/02/23 22:09:14 INFO BlockManagerInfo: Removed broadcast_1_piece0 on 192.168.56.1:61435 in memory (size: 2024.0 B, free: 901.8 MB)
18/02/23 22:09:14 INFO BlockManagerInfo: Removed broadcast_3_piece0 on 192.168.56.1:61435 in memory (size: 2016.0 B, free: 901.8 MB)
18/02/23 22:09:14 INFO SparkContext: Starting job: csv at DataFrame_Tutuorial.scala:7
18/02/23 22:09:14 INFO DAGScheduler: Got job 2 (csv at DataFrame_Tutuorial.scala:7) with 2 output partitions
18/02/23 22:09:14 INFO DAGScheduler: Final stage: ResultStage 2 (csv at DataFrame_Tutuorial.scala:7)
18/02/23 22:09:14 INFO DAGScheduler: Parents of final stage: List()
18/02/23 22:09:14 INFO DAGScheduler: Missing parents: List()
18/02/23 22:09:14 INFO DAGScheduler: Submitting ResultStage 2 (MapPartitionsRDD[6] at csv at DataFrame_Tutuorial.scala:7), which has no missing parents
18/02/23 22:09:14 INFO MemoryStore: Block broadcast_4 stored as values in memory (estimated size 4.7 KB, free 901.6 MB)
18/02/23 22:09:14 INFO MemoryStore: Block broadcast_4_piece0 stored as bytes in memory (estimated size 2.9 KB, free 901.6 MB)
18/02/23 22:09:14 INFO BlockManagerInfo: Added broadcast_4_piece0 in memory on 192.168.56.1:61435 (size: 2.9 KB, free: 901.8 MB)
18/02/23 22:09:14 INFO SparkContext: Created broadcast 4 from broadcast at DAGScheduler.scala:1012
18/02/23 22:09:14 INFO DAGScheduler: Submitting 2 missing tasks from ResultStage 2 (MapPartitionsRDD[6] at csv at DataFrame_Tutuorial.scala:7)
18/02/23 22:09:14 INFO TaskSchedulerImpl: Adding task set 2.0 with 2 tasks
18/02/23 22:09:14 INFO TaskSetManager: Starting task 0.0 in stage 2.0 (TID 2, localhost, partition 0, PROCESS_LOCAL, 5380 bytes)
18/02/23 22:09:14 INFO TaskSetManager: Starting task 1.0 in stage 2.0 (TID 3, localhost, partition 1, PROCESS_LOCAL, 5380 bytes)
18/02/23 22:09:14 INFO Executor: Running task 0.0 in stage 2.0 (TID 2)
18/02/23 22:09:14 INFO Executor: Running task 1.0 in stage 2.0 (TID 3)
18/02/23 22:09:14 INFO HadoopRDD: Input split: file:/C:/Users/vijub/Desktop/input1.txt:0+78080
18/02/23 22:09:14 INFO HadoopRDD: Input split: file:/C:/Users/vijub/Desktop/input1.txt:78080+78081
18/02/23 22:09:14 INFO Executor: Finished task 0.0 in stage 2.0 (TID 2). 1077 bytes result sent to driver
18/02/23 22:09:14 INFO TaskSetManager: Finished task 0.0 in stage 2.0 (TID 2) in 98 ms on localhost (1/2)
18/02/23 22:09:14 INFO Executor: Finished task 1.0 in stage 2.0 (TID 3). 990 bytes result sent to driver
18/02/23 22:09:14 INFO TaskSetManager: Finished task 1.0 in stage 2.0 (TID 3) in 102 ms on localhost (2/2)
18/02/23 22:09:14 INFO TaskSchedulerImpl: Removed TaskSet 2.0, whose tasks have all completed, from pool
18/02/23 22:09:14 INFO DAGScheduler: ResultStage 2 (csv at DataFrame_Tutuorial.scala:7) finished in 0.105 s
18/02/23 22:09:14 INFO DAGScheduler: Job 2 finished: csv at DataFrame_Tutuorial.scala:7, took 0.118861 s
Exception in thread "main" java.lang.IllegalArgumentException: java.net.URISyntaxException: Relative path in absolute URI: file:C:/Users/vijub/IdeaProjects/sample/spark-warehouse
at org.apache.hadoop.fs.Path.initialize(Path.java:206)
at org.apache.hadoop.fs.Path.<init>(Path.java:172)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.makeQualifiedPath(SessionCatalog.scala:114)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.createDatabase(SessionCatalog.scala:145)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.<init>(SessionCatalog.scala:89)
at org.apache.spark.sql.internal.SessionState.catalog$lzycompute(SessionState.scala:95)
at org.apache.spark.sql.internal.SessionState.catalog(SessionState.scala:95)
at org.apache.spark.sql.internal.SessionState$$anon$1.<init>(SessionState.scala:112)
at org.apache.spark.sql.internal.SessionState.analyzer$lzycompute(SessionState.scala:112)
at org.apache.spark.sql.internal.SessionState.analyzer(SessionState.scala:111)
at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:49)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64)
at org.apache.spark.sql.SparkSession.baseRelationToDataFrame(SparkSession.scala:382)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:143)
at org.apache.spark.sql.DataFrameReader.csv(DataFrameReader.scala:401)
at org.apache.spark.sql.DataFrameReader.csv(DataFrameReader.scala:342)
at DataFrame_Tutuorial$.main(DataFrame_Tutuorial.scala:7)
at DataFrame_Tutuorial.main(DataFrame_Tutuorial.scala)
Caused by: java.net.URISyntaxException: Relative path in absolute URI: file:C:/Users/vijub/IdeaProjects/sample/spark-warehouse
at java.net.URI.checkPath(URI.java:1823)
at java.net.URI.<init>(URI.java:745)
at org.apache.hadoop.fs.Path.initialize(Path.java:203)
... 17 more
18/02/23 22:09:15 INFO SparkContext: Invoking stop() from shutdown hook
18/02/23 22:09:15 INFO SparkUI: Stopped Spark web UI at http://192.168.56.1:4040
18/02/23 22:09:15 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
18/02/23 22:09:15 INFO MemoryStore: MemoryStore cleared
18/02/23 22:09:15 INFO BlockManager: BlockManager stopped
18/02/23 22:09:15 INFO BlockManagerMaster: BlockManagerMaster stopped
18/02/23 22:09:15 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
18/02/23 22:09:15 INFO SparkContext: Successfully stopped SparkContext
18/02/23 22:09:15 INFO ShutdownHookManager: Shutdown hook called
18/02/23 22:09:15 INFO ShutdownHookManager: Deleting directory C:\Users\vijub\AppData\Local\Temp\spark-273b8193-1435-4768-b480-915c6bb334bb
Process finished with exit code 1