我无法重复使用通过执行mapPartition形成的新RDD。只有在mapPartition之后添加reduceByKey
转换(我不想执行reduceByKey
)时,代码似乎才能正常工作。由于reduceByKey
和mapParition
都是转换,因此我不确定导致错误的原因。
C = [x for x in xrange(2**20)]
C = sc.parallelize(C)
while True:
C = C.repartition(1000)\
.mapPartitions(foo)\
.persist()
if C.take(1) == 0 :
break
报告错误:
TypeError: can't pickle listiterator objects
答案 0 :(得分:0)
错误是由于你从mapPartitions返回的内容是我所相信的。如果您返回一个新的RDD,我没有看到代码引发异常的原因。以下是我使用您的代码开发的代码段。
var customIcons = {
type1: {
icon: 'icon_type1.png'
},
type2: {
icon: 'icon_type2.png'
},
type3: {
icon: 'icon_type3.png'
},
type4: {
icon: 'icon_type4.png'
}
};
function initMap() {
var cluster = [];
var map = new google.maps.Map(document.getElementById("map"), {
center: new google.maps.LatLng(0, 0),
zoom: 1,
mapTypeId: 'roadmap'
});
var infowindow = new google.maps.InfoWindow();
// Change this depending on the name of your PHP file
downloadUrl('https://my-website.com/the-sweet-sweet-xml-info.php', function(data) {
var xml = data.responseXML;
var markers = xml.documentElement.getElementsByTagName("marker");
for (var i = 0; i < markers.length; i++) {
var name = markers[i].getAttribute("name");
var address = markers[i].getAttribute("address");
var type = markers[i].getAttribute("type");
var point = new google.maps.LatLng(
parseFloat(markers[i].getAttribute("lat")),
parseFloat(markers[i].getAttribute("lng")));
var html= "<b>" +
markers[i].getAttribute("name") +
"</b> <br/>" +
markers[i].getAttribute("address");
var icon = customIcons[type] || {};
var marker = new google.maps.Marker({
map: map,
position: point,
icon: icon.icon,
});
google.maps.event.addListener(marker, 'click', (function(marker, i) {
return function() {
infowindow.setContent(
"<b>" +
markers[i].getAttribute("name") +
"</b> <br/>" +
markers[i].getAttribute("address")
);
infowindow.open(map, marker);
//This sends information from the clicked icon back to the serverside code
document.getElementById("setlatlng").innerHTML = markers[i].getAttribute("name");
}
})(marker, i));
cluster.push(marker);
}
var options = {
imagePath: '/location-of-cluster-icons/m'
};
var mc = new MarkerClusterer(map,cluster,options);
});
}
function bindInfoWindow(marker, map, infoWindow, html) {
google.maps.event.addListener(marker, 'click', function() {
infoWindow.setContent(html);
infoWindow.open(map, marker);
});
}
function downloadUrl(url, callback) {
var request = window.ActiveXObject ?
new ActiveXObject('the-sweet-sweet-xml-info.php') :
new XMLHttpRequest;
request.onreadystatechange = function() {
if (request.readyState == 4) {
request.onreadystatechange = doNothing;
callback(request, request.status);
}
};
request.open('GET', url, true);
request.send(null);
}
function doNothing() {}
最好使用def f(it):
s = 0
l = 0
for x in it:
s += x
l += 1
if l > 1:
yield s
C = sc.parallelize([x for x in range(100)])
while True:
C = C.repartition(10)\
.mapPartitions(f)
if C.isEmpty():
break
而不是isEmpty()
来检查RDD是否为空。