在特定的公差范围内使用pandas groupby

时间:2018-07-23 19:51:47

标签: python pandas pandas-groupby

我有一个看起来像这样的数据框:

var express = require('express')
var app = express()
var http = require('http')
var Raven = require('raven')
var shippo = require('shippo')('ACCESS_TOKEN')
var engines = require('consolidate')
const bodyParser = require('body-parser')
const path = require('path')
app.use(bodyParser.urlencoded({extended: true}))
app.use(bodyParser.json())
app.engine('html', engines.mustache)
app.set('view engine', 'html')
//app.use(express.static(path.join(_dirname,'/')))


app.get('/', function (req, res) {
  res.render('Index.html')
})
app.post('/', function (req, res) {

  var addressFrom  = {
      "name": "SENDER_NAME",
      "company":"Shippo",
      "street1":"215 Clayton St.",
      "city":"San Francisco",
      "state":"CA",
      "zip":"94117",
      "country":"US", //iso2 country code
      "phone":"+1 555 341 9393",
      "email":"SENDER_EMAIL"
  };

// example address_to object dict
  var addressTo = {
    "name": "Mr Hippo",
    "street1": "Broadway 1",
    "city": "New York",
    "state": "NY",
    "zip": "10007",
    "country": "US",
    "phone": "+1 555 341 9393",
    "email": "mrhippo@goshippo.com"
  };

  // parcel object dict
  var parcel = {
      "length":"5",
      "width":"5",
      "height":"5",
      "distance_unit":"in",
      "weight":"2",
      "mass_unit":"lb"
  };


  var shipment = shippo.shipment.create({
    "address_from": addressFrom,
    "address_to": addressTo,
    "parcels": [parcel],
    "carrier_account": 'CARRIER_TOKEN',
    "async": false
  });



app.listen(3000, function () {
  console.log('Example app listening on port 3000!')
})

对于N的每个匹配值,我想将具有相似Z值(在+/- .1内)的S值之和作为最大S值。这是我想要它做的一个例子:

 N  S     Z
 1  4  2.00
 1  5  2.02
 1  1  3.00
 1  3  4.00
 1  7  1.90
 2  2  3.20
 2  3  2.80
 2  2  1.50
 3  1  2.50
 3  3  2.10
 3  6  3.30
 3  7  3.20

我的想法是做类似 N S 1 11 2 3 3 13 的事情,但这不包括Z的公差。

1 个答案:

答案 0 :(得分:0)

首先,您提供的答案与您提供的描述不符。 从描述中,我希望下面的代码可以工作:

grouped_df = df.groupby('N')
Svalues = []
for key, item in grouped_df:
    gd = grouped_df.get_group(key)
    maxS = pd.DataFrame(index=gd.index.values, columns=['max_S'])
    maxS.fillna(0)
    for index, row in gd.iterrows():
        val = row['Z']
        dff = gd.query('-0.10 <= %s-Z <= 0.10' % val)
        maxS.ix[index, 'max_S'] = dff['S'].sum()
    Svalues.append(maxS['max_S'].max())

x = pd.DataFrame(data={'N':list(grouped_df.groups.keys()),
                       'S':Svalues})
print(x)

I首先,按值N对数据帧进行分组。现在,在每个组中,我通过查询数据帧来检查Z的值是否在公差级别内,并存储S的总和。最后获得S的总和的最大值。