根据条件划分不同容器中的数据帧

时间:2018-01-31 12:50:38

标签: pandas

我有一个pandas数据帧

id  no_of_rows
1  2689
2  1515
3  3826
4  814
5  1650
6  2292
7  1867
8  2096
9  1618
10  923
11  766
12  191

我想基于他们的号码将id分成5个不同的箱子。行, 这样每个箱子都有大约(等于没有行) 并将其指定为新列bin

我认为的一种方法是

df.no_of_rows.sum() = 20247
div_factor = 20247//5 == 4049

如果我们添加第1行和第2行,则其总和= 2689 + 1515 = 4204> div_factor。 因此,指定bin = 1,其中id = 1。 现在寻找下一个

id  no_of_rows  bin
1  2689          1
2  1515          2
3  3826          3
4  814           4
5  1650          4
6  2292          5
7  1867
8  2096
9  1618
10  923
11  766
12  191

但这种方法被证明是错误的。

有没有办法拥有5个箱子,这样每个箱子都有很多商店(大致相等)

1 个答案:

答案 0 :(得分:0)

您可以使用基于百分位数的方法。

<script>function openMap(evt, mapName) {
  // Declare all variables
  var i, tabcontent, tablinks;

  // Get all elements with class="tabcontent_map" and hide them
  tabcontent = document.getElementsByClassName("tabcontent_map");
  for (i = 0; i < tabcontent.length; i++) {
    tabcontent[i].style.display = "none";
  }

  // Get all elements with class="tablinks_map" and remove the class "active"
  tablinks = document.getElementsByClassName("tablinks_map");
  for (i = 0; i < tablinks.length; i++) {
    tablinks[i].className = tablinks[i].className.replace(" active", "");
  }

  // Show the current tab, and add an "active" class to the link that opened the tab
  document.getElementById(mapName).style.display = "block";
  evt.currentTarget.className += " active";
  
  google.maps.event.trigger(mapEtranger,'resize')

}
</script>
<div class="tab_map">
  <button id="defaultOpen" class="tablinks_map" onclick="openMap(event, 'France')">ETA en France</button>
  <button class="tablinks_map" onclick="openMap(event, 'Etranger')">ETA à l'étranger</button>
</div>
<div id="France" class="tabcontent_map">
  <div id="mapFrance"></div>
</div>
<div id="Etranger" class="tabcontent_map">
  <div id="mapEtranger"></div>
</div>
<script>
var mapEtranger;
  function initMaps() {
    var centre_france = {
      lat: 46.599923,
      lng: 2.432420
    };
    var mapFrance = new google.maps.Map(document.getElementById('mapFrance'), {
      zoom: 6,
      center: centre_france
    });
    mapEtranger = new google.maps.Map(document.getElementById('mapEtranger'), {
      zoom: 6,
      center: centre_france
    });
    var locationsFrance = [
      ['Bureaux à Plérin', 48.541236, -2.778751],
      ['Bureaux à Arras', 50.290853, 2.777303],
      ['Bureaux à Grenoble', 45.184039, 5.722752],
      ['Sarthe', 48.003300, 0.206723],
      ['Gironde', 44.836597, -0.581157],
      ['Vaucluse', 43.876778, 5.397163],
      ['Paris', 48.856579, 2.351521]
    ];
    var locationsEtranger = [
      ['Bureaux à Plérin', 48.541236, -2.778751],
      ['Bureaux à Arras', 50.290853, 2.777303],
      ['Bureaux à Grenoble', 45.184039, 5.722752],
      ['Nouvelle-Zélande', -42.425169, 172.390052],
      ['Guyane', 4.648057, -52.817376],
      ['République Dominicaine', 18.895404, -70.224045]
    ];
    var infowindow = new google.maps.InfoWindow();
    var marker, i;
    for (i = 0; i < locationsFrance.length; i++) {
      marker = new google.maps.Marker({
        position: new google.maps.LatLng(locationsFrance[i][1], locationsFrance[i][2]),
        map: mapFrance
      });
      google.maps.event.addListener(marker, 'click', (function(marker, i) {
        return function() {
          infowindow.setContent(locationsFrance[i][0]);
          infowindow.open(mapFrance, marker);
        }
      })(marker, i));
    }
    for (i = 0; i < locationsEtranger.length; i++) {
      marker = new google.maps.Marker({
        position: new google.maps.LatLng(locationsEtranger[i][1], locationsEtranger[i][2]),
        map: mapEtranger
      });
      google.maps.event.addListener(marker, 'click', (function(marker, i) {
        return function() {
          infowindow.setContent(locationsEtranger[i][0]);
          infowindow.open(mapEtranger, marker);
        }
      })(marker, i));
    }
  }

</script>
<script async defer src="https://maps.googleapis.com/maps/api/js?key=AIzaSyCIADRO5W2CiaE48H4A4Mt8KFUt0eHtc4M&callback=initMaps"></script>
<script>
  document.getElementById("defaultOpen").click();
</script>

然后你可以查看

n_bins = 5
dfa = df.sort_values(by='no_of_rows').cumsum()
df['bin'] = dfa.no_of_rows.apply(lambda x: int(n_bins*x/dfa.no_of_rows.max()))

你拥有的记录越多,在分散方面就越公平。