from pyspark import SparkContext
from pyspark.sql import SparkSession
from pyspark.sql.types import *
import os
os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages com.databricks:spark-xml_2.10:0.4.1 pyspark-shell'
conf = SparkConf().setAppName('Stackoverflow')
sc = SparkContext(master="local", appName="test")
sc.setLogLevel("Error")
spark = SparkSession.builder.getOrCreate()
df=spark.read.format("com.databricks.spark.xml").option("rowTag","Transaction").load("C:/Users/Rajaraman/Desktop/task/data/transactions.xml")
无法回溯(最近通话最近): 在第6行的文件“ C:/Users/Rajaraman/test.py” conf = SparkConf()。setAppName('Stackoverflow') NameError:名称“ SparkConf”未定义
好心地查看
答案 0 :(得分:0)
您需要导入代码中引用的库
添加此行以导入引用的包
from pyspark import SparkConf, SparkContext