我收到了多个火花上下文错误。
有人可以帮我解决这个问题吗?
如果我parsing.take(1)
正常运行。但是当我在代码的最后一行take > 2
时,它会给出多个spark上下文错误。
非常感谢任何帮助
from pyspark import SparkConf
from pyspark import SparkContext
sc = SparkContext()
from pyspark.sql import SQLContext
sqlContext = SQLContext(sc)
############ IRIS DataSet ##############
iris= sc.textFile("hdfs:///user/edureka/IRIS.csv")
testset,trainingset = iris.randomSplit([1,2])
import numpy as np
def parse_interaction(line):
line_split = line.split(",")
# keep just numeric and logical values
symbolic_indexes = [4] # Specify the columns which has the String values
features = [item for i,item in enumerate(line_split) if i not in symbolic_indexes]
return np.array([float(x) for x in features])
def parse_interaction_label(line):
line_split = line.split(",")
# keep just numeric and logical values
symbolic_indexes = [4] # Specify the columns which has the String values
label = [item for i,item in enumerate(line_split) if i in symbolic_indexes]
return np.array([float(x) for x in label])
features_train = trainingset.map(parse_interaction)
labels_train = trainingset.map(parse_interaction_label)
features_test=testset.map(parse_interaction)
labels_test=testset.map(parse_interaction_label)
def parse_interaction_with_key(line):
line_split = line.split(",")
# keep just numeric and logical values
#symbolic_indexes = [4] # Specify the columns which has the String values
features_label = [item for i,item in enumerate(line_split)]
return (np.array([float(x) for x in features_label]))
features_train_label = trainingset.map(parse_interaction_with_key)
features_test_label= testset.map(parse_interaction_with_key)
product=features_train_label.cartesian(features_test_label)
import math
def distancecal(line):
training_label=line[0]
training=training_label[0:4] # hardcoded the Training Column
train_label = training_label[-1]
testing_label=line[1]
test=testing_label[0:4] # Hardcoded the Testing column Modified the Testing Column
stringtest=str(line[1])
points=zip(training,test)
diffs_squared_distance = [pow(a - b, 2) for (a, b) in points]
score = math.sqrt(sum(diffs_squared_distance))
training_label = np.append(training_label,score)
return (stringtest,training_label)
training_label_test_score = product.map(distancecal)
keyvalue=training_label_test_score.groupByKey().mapValues(list)
def sortingvalue(l):
from pyspark import SparkConf
from pyspark import SparkContext
#conf1 = SparkConf()
#conf1.setAppName('Sorting Job Stream')
#conf1.set("spark.ui.port", "36000")
#conf1.set("spark.driver.allowMultipleContexts", "true")
sc1 = SparkContext()
v = sc1.parallelize(l)
vSorted = v.sortBy(lambda a: a[5])
return(vSorted.collect())
def parsekeyvalueforsorting(line):
key=line[0]
cdata=line[1]
scdata=sortingvalue(cdata)
return (key,scdata)
parsing=keyvalue.map(parsekeyvalueforsorting)
print(parsing.take(2))
这是我的列表,第一个元素是字符串,第二个是数组列表:
[('[ 0.2 1.4 3.4 5.2 0. ]', [array([ 0.2, 1.4, 3. , 4.9, 0. , **0.5**]), array([ 0.2 , 1.3 , 3.2 , 4.7 , 0. ,**0.54772256**]), array([ 0.2 , 1.4 , 3.6 , 5. , 0. ,
0.28284271]), array([ 0.4 , 1.7 , 3.9 , 5.4 , 0. ,
0.64807407]), array([ 0.2 , 1.5 , 3.4 , 5. , 0. ,
0.2236068]), array([ 0.2 , 1.4 , 2.9 , 4.4 , 0. ,
0.94339811]), array([ 0.1 , 1.5 , 3.1 , 4.9 , 0. ,
0.4472136]), array([ 0.2 , 1.5 , 3.7 , 5.4 , 0. ,
0.37416574]), array([ 0.2 , 1.6 , 3.4 , 4.8 , 0. ,
0.4472136]), array([ 0.1 , 1.4 , 3. , 4.8 , 0. ,
0.57445626]), array([ 0.1 , 1.1 , 3. , 4.3 , 0. ,
1.03440804]), array([ 0.4 , 1.5 , 4.4 , 5.7 , 0. ,
1.14017543]), array([ 0.4 , 1.3 , 3.9 , 5.4 , 0. ,
0.58309519]), array([ 0.3 , 1.7 , 3.8 , 5.7 , 0. ,
0.71414284]), array([ 0.3 , 1.5 , 3.8 , 5.1 , 0. ,
0.43588989]), array([ 0.2 , 1.7 , 3.4 , 5.4 , 0. ,
0.36055513]), array([ 0.4 , 1.5 , 3.7 , 5.1 , 0. ,
0.38729833]), array([ 0.2 , 1. , 3.6 , 4.6 , 0. ,
0.74833148]), array([ 0.5 , 1.7 , 3.3 , 5.1 , 0. ,
0.4472136]), array([ 0.2 , 1.9 , 3.4 , 4.8 , 0. ,
0.64031242]), array([ 0.2 , 1.6 , 3. , 5. , 0. ,
0.48989795]), array([ 0.4 , 1.6 , 3.4 , 5. , 0. ,
0.34641016]), array([ 0.2 , 1.5 , 3.5 , 5.2 , 0. ,
0.14142136]), array([ 0.4, 1.5, 3.4, 5.4, 0. , 0.3]), array([ 0.2 , 1.5 , 3.1 , 4.9 , 0. ,
0.43588989]), array([ 0.2 , 1.2 , 3.2 , 5. , 0. ,
0.34641016]), array([ 0.2 , 1.3 , 3.5 , 5.5 , 0. ,
0.33166248]), array([ 0.2 , 1.5 , 3.4 , 5.1 , 0. ,
0.14142136]), array([ 0.3 , 1.3 , 2.3 , 4.5 , 0. ,
1.3114877]), array([ 0.4 , 1.9 , 3.8 , 5.1 , 0. , 0.678233]), array([ 0.3 , 1.4 , 3. , 4.8 , 0. ,
0.57445626]), array([ 0.2 , 1.6 , 3.8 , 5.1 , 0. ,
0.45825757]), array([ 0.2 , 1.4 , 3.2 , 4.6 , 0. ,
0.63245553]), array([ 0.2 , 1.5 , 3.7 , 5.3 , 0. ,
0.33166248]), array([ 0.2 , 1.4 , 3.3 , 5. , 0. ,
0.2236068]), array([ 1.3 , 4. , 2.3 , 5.5 , 1. ,
3.04466747]), array([ 1.5 , 4.6 , 2.8 , 6.5 , 1. ,
3.73898382]), array([ 1.3 , 4.6 , 2.9 , 6.6 , 1. ,
3.69594372]), array([ 1.4 , 3.9 , 2.7 , 5.2 , 1. ,
2.86006993]), array([ 1.5 , 4.2 , 3. , 5.9 , 1. ,
3.19061123]), array([ 1. , 4. , 2.2 , 6. , 1. ,
3.07896086]), array([ 1.3 , 3.6 , 2.9 , 5.6 , 1. ,
2.54165301]), array([ 1.5 , 4.5 , 3. , 5.6 , 1. ,
3.40881211]), array([ 1. , 4.1 , 2.7 , 5.8 , 1. ,
2.96310648]), array([ 1.5 , 4.5 , 2.2 , 6.2 , 1. ,
3.7067506]), array([ 1.3 , 4. , 2.8 , 6.1 , 1. ,
3.02324329]), array([ 1.5 , 4.9 , 2.5 , 6.3 , 1. ,
3.99499687]), array([ 1.2 , 4.7 , 2.8 , 6.1 , 1. ,
3.6138622]), array([ 1.3 , 4.3 , 2.9 , 6.4 , 1. ,
3.36303434]), array([ 1.4 , 4.8 , 2.8 , 6.8 , 1. ,
3.98998747]), array([ 1.7 , 5. , 3. , 6.7 , 1. ,
4.19761837]), array([ 1.5 , 4.5 , 2.9 , 6. , 1. ,
3.49141805]), array([ 1. , 3.5 , 2.6 , 5.7 , 1. ,
2.43721152]), array([ 1.1 , 3.8 , 2.4 , 5.5 , 1. ,
2.7676705]), array([ 1. , 3.7 , 2.4 , 5.5 , 1. ,
2.64952826]), array([ 1.2 , 3.9 , 2.7 , 5.8 , 1. ,
2.84604989]), array([ 1.6 , 5.1 , 2.7 , 6. , 1. ,
4.09633983]), array([ 1.5 , 4.5 , 3. , 5.4 , 1. ,
3.39116499]), array([ 1.3 , 4.4 , 2.3 , 6.3 , 1. ,
3.55387113]), array([ 1.3 , 4. , 2.5 , 5.5 , 1. ,
2.97825452]), array([ 1.2 , 4.4 , 2.6 , 5.5 , 1. ,
3.27566787]), array([ 1. , 3.3 , 2.3 , 5. , 1. ,
2.34520788]), array([ 1.3 , 4.2 , 2.7 , 5.6 , 1. ,
3.1144823]), array([ 1.2 , 4.2 , 3. , 5.7 , 1. ,
3.04138127]), array([ 1.3 , 4.3 , 2.9 , 6.2 , 1. ,
3.2969683]), array([ 2.5 , 6. , 3.3 , 6.3 , 2. ,
5.26022813]), array([ 1.9 , 5.1 , 2.7 , 5.8 , 2. ,
4.17492515]), array([ 2.1 , 5.9 , 3. , 7.1 , 2. ,
5.25642464]), array([ 1.8 , 5.6 , 2.9 , 6.3 , 2. ,
4.65403051]), array([ 2.2 , 5.8 , 3. , 6.5 , 2. ,
5.02095608]), array([ 1.8 , 6.3 , 2.9 , 7.3 , 2. ,
5.5883808]), array([ 1.8 , 5.8 , 2.5 , 6.7 , 2. ,
4.9979996]), array([ 2.5 , 6.1 , 3.6 , 7.2 , 2. ,
5.60535458]), array([ 2. , 5.1 , 3.2 , 6.5 , 2. ,
4.31972221]), array([ 1.9 , 5.3 , 2.7 , 6.4 , 2. ,
4.4754888]), array([ 2.1 , 5.5 , 3. , 6.8 , 2. ,
4.81040539]), array([ 2. , 5. , 2.5 , 5.7 , 2. ,
4.15451562]), array([ 1.8 , 5.5 , 3. , 6.5 , 2. ,
4.60651712]), array([ 2.2 , 6.7 , 3.8 , 7.7 , 2. ,
6.20483682]), array([ 2.3 , 6.9 , 2.6 , 7.7 , 2. ,
6.44592895]), array([ 1.5 , 5. , 2.2 , 6. , 2. ,
4.09023227]), array([ 2. , 4.9 , 2.8 , 5.6 , 2. ,
4.0012498]), array([ 1.8 , 4.9 , 2.7 , 6.3 , 2. ,
4.06324993]), array([ 1.8 , 6. , 3.2 , 7.2 , 2. ,
5.26877595]), array([ 1.6 , 5.8 , 3. , 7.2 , 2. ,
5.04777179]), array([ 2. , 6.4 , 3.8 , 7.9 , 2. ,
5.97411081]), array([ 2.2 , 5.6 , 2.8 , 6.4 , 2. ,
4.84148737]), array([ 1.5 , 5.1 , 2.8 , 6.3 , 2. ,
4.11703777]), array([ 2.3 , 6.1 , 3. , 7.7 , 2. ,
5.7367238]), array([ 2.4 , 5.6 , 3.4 , 6.3 , 2. ,
4.86723741]), array([ 1.8 , 5.5 , 3.1 , 6.4 , 2. ,
4.57165178]), array([ 2.4 , 5.6 , 3.1 , 6.7 , 2. ,
4.98196748]), array([ 2.3 , 5.1 , 3.1 , 6.9 , 2. ,
4.59129611]), array([ 2.3 , 5.9 , 3.2 , 6.8 , 2. ,
5.22111099]), array([ 2.5 , 5.7 , 3.3 , 6.7 , 2. ,
5.10294033]), array([ 2.3 , 5.2 , 3. , 6.7 , 2. ,
4.61085675]), array([ 1.9 , 5. , 2.5 , 6.3 , 2. ,
4.22729228]), array([ 2.3 , 5.4 , 3.4 , 6.2 , 2. ,
4.62709412]), array([ 1.8 , 5.1 , 3. , 5.9 , 2. ,
4.11096096])]), ('[ 0.3 1.4 3.4 4.6 0. ]', [array([ 0.2 , 1.4 , 3. , 4.9 , 0. ,
0.50990195]), array([ 0.2 , 1.3 , 3.2 , 4.7 , 0. ,
0.26457513]), array([ 0.2 , 1.4 , 3.6 , 5. , 0. ,
0.45825757]), array([ 0.4 , 1.7 , 3.9 , 5.4 , 0. ,
0.99498744]), array([ 0.2 , 1.5 , 3.4 , 5. , 0. ,
0.42426407]), array([ 0.2 , 1.4 , 2.9 , 4.4 , 0. ,
0.54772256]), array([ 0.1 , 1.5 , 3.1 , 4.9 , 0. ,
0.47958315]), array([ 0.2 , 1.5 , 3.7 , 5.4 , 0. ,
0.8660254]), array([ 0.2, 1.6, 3.4, 4.8, 0. , 0.3]), array([ 0.1 , 1.4 , 3. , 4.8 , 0. ,
0.48989795]), array([ 0.1 , 1.1 , 3. , 4.3 , 0. ,
0.6164414]), array([ 0.4 , 1.5 , 4.4 , 5.7 , 0. ,
1.49331845]), array([ 0.4 , 1.3 , 3.9 , 5.4 , 0. ,
0.9539392]), array([ 0.3 , 1.7 , 3.8 , 5.7 , 0. ,
1.2083046]), array([ 0.3 , 1.5 , 3.8 , 5.1 , 0. ,
0.64807407]), array([ 0.2 , 1.7 , 3.4 , 5.4 , 0. ,
0.86023253]), array([ 0.4, 1.5, 3.7, 5.1, 0. , 0.6]), array([ 0.2 , 1. , 3.6 , 4.6 , 0. ,
0.45825757]), array([ 0.5 , 1.7 , 3.3 , 5.1 , 0. ,
0.6244998]), array([ 0.2 , 1.9 , 3.4 , 4.8 , 0. ,
0.54772256]), array([ 0.2 , 1.6 , 3. , 5. , 0. ,
0.60827625]), array([ 0.4 , 1.6 , 3.4 , 5. , 0. ,
0.45825757]), array([ 0.2 , 1.5 , 3.5 , 5.2 , 0. ,
0.6244998]), array([ 0.4 , 1.5 , 3.4 , 5.4 , 0. ,
0.81240384]), array([ 0.2 , 1.5 , 3.1 , 4.9 , 0. ,
0.4472136]), array([ 0.2, 1.2, 3.2, 5. , 0. , 0.5]), array([ 0.2 , 1.3 , 3.5 , 5.5 , 0. ,
0.91651514]), array([ 0.2 , 1.5 , 3.4 , 5.1 , 0. ,
0.51961524]), array([ 0.3 , 1.3 , 2.3 , 4.5 , 0. ,
1.10905365]), array([ 0.4 , 1.9 , 3.8 , 5.1 , 0. ,
0.81853528]), array([ 0.3 , 1.4 , 3. , 4.8 , 0. ,
0.4472136]), array([ 0.2 , 1.6 , 3.8 , 5.1 , 0. , 0.678233]), array([ 0.2 , 1.4 , 3.2 , 4.6 , 0. ,
0.2236068]), array([ 0.2 , 1.5 , 3.7 , 5.3 , 0. ,
0.77459667]), array([ 0.2 , 1.4 , 3.3 , 5. , 0. ,
0.42426407]), array([ 1.3 , 4. , 2.3 , 5.5 , 1. ,
3.12729915]), array([ 1.5 , 4.6 , 2.8 , 6.5 , 1. ,
3.95600809]), array([ 1.3 , 4.6 , 2.9 , 6.6 , 1. ,
3.93573373]), array([ 1.4 , 3.9 , 2.7 , 5.2 , 1. ,
2.88270706]), array([ 1.5 , 4.2 , 3. , 5.9 , 1. ,
3.33616546]), array([ 1. , 4. , 2.2 , 6. , 1. ,
3.26343377]), array([ 1.3 , 3.6 , 2.9 , 5.6 , 1. ,
2.66270539]), array([ 1.5 , 4.5 , 3. , 5.6 , 1. ,
3.49428104]), array([ 1. , 4.1 , 2.7 , 5.8 , 1. ,
3.11608729]), array([ 1.5 , 4.5 , 2.2 , 6.2 , 1. ,
3.87943295]), array([ 1.3 , 4. , 2.8 , 6.1 , 1. ,
3.22024844]), array([ 1.5 , 4.9 , 2.5 , 6.3 , 1. ,
4.17013189]), array([ 1.2 , 4.7 , 2.8 , 6.1 , 1. ,
3.78285606]), array([ 1.3 , 4.3 , 2.9 , 6.4 , 1. , 3.591657]), array([ 1.4 , 4.8 , 2.8 , 6.8 , 1. ,
4.23910368]), array([ 1.7 , 5. , 3. , 6.7 , 1. ,
4.41474801]), array([ 1.5 , 4.5 , 2.9 , 6. , 1. ,
3.64142829]), array([ 1. , 3.5 , 2.6 , 5.7 , 1. ,
2.59807621]), array([ 1.1 , 3.8 , 2.4 , 5.5 , 1. ,
2.86530976]), array([ 1. , 3.7 , 2.4 , 5.5 , 1. ,
2.75499546]), array([ 1.2 , 3.9 , 2.7 , 5.8 , 1. ,
2.99833287]), array([ 1.6 , 5.1 , 2.7 , 6. , 1. ,
4.22255847]), array([ 1.5 , 4.5 , 3. , 5.4 , 1. ,
3.4423829]), array([ 1.3 , 4.4 , 2.3 , 6.3 , 1. ,
3.75499667]), array([ 1.3 , 4. , 2.5 , 5.5 , 1. ,
3.06267857]), array([ 1.2 , 4.4 , 2.6 , 5.5 , 1. ,
3.35559235]), array([ 1. , 3.3 , 2.3 , 5. , 1. ,
2.33880311]), array([ 1.3 , 4.2 , 2.7 , 5.6 , 1. ,
3.21403174]), array([ 1.2 , 4.2 , 3. , 5.7 , 1. ,
3.16543836]), array([ 1.3 , 4.3 , 2.9 , 6.2 , 1. ,
3.49571166]), array([ 2.5 , 6. , 3.3 , 6.3 , 2. ,
5.37587202]), array([ 1.9 , 5.1 , 2.7 , 5.8 , 2. ,
4.26380112]), array([ 2.1 , 5.9 , 3. , 7.1 , 2. ,
5.46808925]), array([ 1.8 , 5.6 , 2.9 , 6.3 , 2. ,
4.79895822]), array([ 2.2 , 5.8 , 3. , 6.5 , 2. ,
5.17107339]), array([ 1.8 , 6.3 , 2.9 , 7.3 , 2. ,
5.81377674]), array([ 1.8 , 5.8 , 2.5 , 6.7 , 2. ,
5.17976833]), array([ 2.5 , 6.1 , 3.6 , 7.2 , 2. ,
5.80775344]), array([ 2. , 5.1 , 3.2 , 6.5 , 2. ,
4.49777723]), array([ 1.9 , 5.3 , 2.7 , 6.4 , 2. ,
4.63680925]), array([ 2.1 , 5.5 , 3. , 6.8 , 2. ,
5.0049975]), array([ 2. , 5. , 2.5 , 5.7 , 2. ,
4.22729228]), array([ 1.8 , 5.5 , 3. , 6.5 , 2. ,
4.77807493]), array([ 2.2 , 6.7 , 3.8 , 7.7 , 2. ,
6.43972049]), array([ 2.3 , 6.9 , 2.6 , 7.7 , 2. ,
6.67083203]), array([ 1.5 , 5. , 2.2 , 6. , 2. ,
4.21900462]), array([ 2. , 4.9 , 2.8 , 5.6 , 2. ,
4.0620192]), array([ 1.8 , 4.9 , 2.7 , 6.3 , 2. ,
4.2284749]), array([ 1.8 , 6. , 3.2 , 7.2 , 2. ,
5.49636243]), array([ 1.6 , 5.8 , 3. , 7.2 , 2. ,
5.28866713]), array([ 2. , 6.4 , 3.8 , 7.9 , 2. ,
6.2401923]), array([ 2.2 , 5.6 , 2.8 , 6.4 , 2. ,
4.98497743]), array([ 1.5 , 5.1 , 2.8 , 6.3 , 2. ,
4.28719022]), array([ 2.3 , 6.1 , 3. , 7.7 , 2. ,
5.98832197]), array([ 2.4 , 5.6 , 3.4 , 6.3 , 2. ,
4.9939964]), array([ 1.8 , 5.5 , 3.1 , 6.4 , 2. ,
4.73180727]), array([ 2.4 , 5.6 , 3.1 , 6.7 , 2. ,
5.15266921]), array([ 2.3 , 5.1 , 3.1 , 6.9 , 2. ,
4.80312398]), array([ 2.3 , 5.9 , 3.2 , 6.8 , 2. ,
5.39722151]), array([ 2.5 , 5.7 , 3.3 , 6.7 , 2. ,
5.26782688]), array([ 2.3 , 5.2 , 3. , 6.7 , 2. ,
4.79687398]), array([ 1.9 , 5. , 2.5 , 6.3 , 2. ,
4.38406204]), array([ 2.3 , 5.4 , 3.4 , 6.2 , 2. ,
4.74973683]), array([ 1.8 , 5.1 , 3. , 5.9 , 2. ,
4.21781934])])]
我需要根据所有列表中的粗体值(第5个元素)对数组的第二个元素进行排序
答案 0 :(得分:0)
有两个隐藏的问题:
答案:出现此错误的原因是:
http://spark.apache.org/docs/latest/programming-guide.html
每个JVM只能激活一个SparkContext。你必须停止() 在创建新的SparkContext之前激活它。
问题是由于 INSIDE map
操作spark-context parsing=keyvalue.map(parsekeyvalueforsorting)
引起的
您正在为每一行创建spark-context。
这是在map
操作中调用的函数,它为keyvalue
中的每一行创建一个新的spark上下文。
def sortingvalue(l):
from pyspark import SparkConf
from pyspark import SparkContext
sc1 = SparkContext()
v = sc1.parallelize(l)
vSorted = v.sortBy(lambda a: a[5])
return(vSorted.collect())
简要回顾一下你的代码,看起来函数需要一行,然后对它进行排序。看起来它可以在python中轻松完成而不需要火花。
这个冲突火花要求:"每个JVM只有一个SparkContext可能处于活动状态"
如何解决问题?
将此函数重写为纯python应解决您的错误。
为了使用python对数组列表进行排序,可以使用python sorted()
函数和key,它将指向数组中的第5个元素:
sorted(l, key=lambda x: x[4])
示例输入:
>>> myin = [([ 0.2,1.4,3.,4.9,0.3,0.5]), ([ 0.2,1.3,3.2,4.7,0.4,0.54772256]),([0.2,1.4,3.6,5.,0.1,0.28284271]), ([0.4,1.7,3.9,5.4,0.7,0.64807407]), ([0.2,1.5,3.4,5.,0.6,0.2236068])]
结果:
>>> sorted(myin,key = lambda x: x[4])
[[0.2, 1.4, 3.6, 5.0, 0.1, 0.28284271], [0.2, 1.4, 3.0, 4.9, 0.3, 0.5], [0.2, 1.3, 3.2, 4.7, 0.4, 0.54772256], [0.2, 1.5
, 3.4, 5.0, 0.6, 0.2236068], [0.4, 1.7, 3.9, 5.4, 0.7, 0.64807407]]
>>>
的更多信息
请注意,您的输入包含单词" array"应将其删除以便在sorted()