我有一个pandas数据帧
id no_of_rows
1 2689
2 1515
3 3826
4 814
5 1650
6 2292
7 1867
8 2096
9 1618
10 923
11 766
12 191
我想基于他们的号码将id分成5个不同的箱子。行,
这样每个箱子都有大约(等于没有行)
并将其指定为新列bin
我认为的一种方法是
df.no_of_rows.sum() = 20247
div_factor = 20247//5 == 4049
如果我们添加第1行和第2行,则其总和= 2689 + 1515 = 4204> div_factor。 因此,指定bin = 1,其中id = 1。 现在寻找下一个
id no_of_rows bin
1 2689 1
2 1515 2
3 3826 3
4 814 4
5 1650 4
6 2292 5
7 1867
8 2096
9 1618
10 923
11 766
12 191
但这种方法被证明是错误的。
有没有办法拥有5个箱子,这样每个箱子都有很多商店(大致相等)
答案 0 :(得分:0)
您可以使用基于百分位数的方法。
<script>function openMap(evt, mapName) {
// Declare all variables
var i, tabcontent, tablinks;
// Get all elements with class="tabcontent_map" and hide them
tabcontent = document.getElementsByClassName("tabcontent_map");
for (i = 0; i < tabcontent.length; i++) {
tabcontent[i].style.display = "none";
}
// Get all elements with class="tablinks_map" and remove the class "active"
tablinks = document.getElementsByClassName("tablinks_map");
for (i = 0; i < tablinks.length; i++) {
tablinks[i].className = tablinks[i].className.replace(" active", "");
}
// Show the current tab, and add an "active" class to the link that opened the tab
document.getElementById(mapName).style.display = "block";
evt.currentTarget.className += " active";
google.maps.event.trigger(mapEtranger,'resize')
}
</script>
<div class="tab_map">
<button id="defaultOpen" class="tablinks_map" onclick="openMap(event, 'France')">ETA en France</button>
<button class="tablinks_map" onclick="openMap(event, 'Etranger')">ETA à l'étranger</button>
</div>
<div id="France" class="tabcontent_map">
<div id="mapFrance"></div>
</div>
<div id="Etranger" class="tabcontent_map">
<div id="mapEtranger"></div>
</div>
<script>
var mapEtranger;
function initMaps() {
var centre_france = {
lat: 46.599923,
lng: 2.432420
};
var mapFrance = new google.maps.Map(document.getElementById('mapFrance'), {
zoom: 6,
center: centre_france
});
mapEtranger = new google.maps.Map(document.getElementById('mapEtranger'), {
zoom: 6,
center: centre_france
});
var locationsFrance = [
['Bureaux à Plérin', 48.541236, -2.778751],
['Bureaux à Arras', 50.290853, 2.777303],
['Bureaux à Grenoble', 45.184039, 5.722752],
['Sarthe', 48.003300, 0.206723],
['Gironde', 44.836597, -0.581157],
['Vaucluse', 43.876778, 5.397163],
['Paris', 48.856579, 2.351521]
];
var locationsEtranger = [
['Bureaux à Plérin', 48.541236, -2.778751],
['Bureaux à Arras', 50.290853, 2.777303],
['Bureaux à Grenoble', 45.184039, 5.722752],
['Nouvelle-Zélande', -42.425169, 172.390052],
['Guyane', 4.648057, -52.817376],
['République Dominicaine', 18.895404, -70.224045]
];
var infowindow = new google.maps.InfoWindow();
var marker, i;
for (i = 0; i < locationsFrance.length; i++) {
marker = new google.maps.Marker({
position: new google.maps.LatLng(locationsFrance[i][1], locationsFrance[i][2]),
map: mapFrance
});
google.maps.event.addListener(marker, 'click', (function(marker, i) {
return function() {
infowindow.setContent(locationsFrance[i][0]);
infowindow.open(mapFrance, marker);
}
})(marker, i));
}
for (i = 0; i < locationsEtranger.length; i++) {
marker = new google.maps.Marker({
position: new google.maps.LatLng(locationsEtranger[i][1], locationsEtranger[i][2]),
map: mapEtranger
});
google.maps.event.addListener(marker, 'click', (function(marker, i) {
return function() {
infowindow.setContent(locationsEtranger[i][0]);
infowindow.open(mapEtranger, marker);
}
})(marker, i));
}
}
</script>
<script async defer src="https://maps.googleapis.com/maps/api/js?key=AIzaSyCIADRO5W2CiaE48H4A4Mt8KFUt0eHtc4M&callback=initMaps"></script>
<script>
document.getElementById("defaultOpen").click();
</script>
然后你可以查看
n_bins = 5
dfa = df.sort_values(by='no_of_rows').cumsum()
df['bin'] = dfa.no_of_rows.apply(lambda x: int(n_bins*x/dfa.no_of_rows.max()))
你拥有的记录越多,在分散方面就越公平。