根据列名称在熊猫数据框上的lambda表达式上使用if else语句

时间:2018-11-06 15:16:18

标签: python python-3.x pandas if-statement lambda

我有一个数据框df,如下所示:

import pandas as pd
df = pd.DataFrame({'a':[78.78, 77.26], 'b':[94.47,94.06], 'c':[0.72, 0.71], 'd':[0.19, 0.29]})

对于列abc,我想提取最小值(到列表中),而对于列d,我想获取最大值值,即:

[77.26, 94.06, 0.71, 0.29]

我主要是想通过lambda表达式来做到这一点

例如,要获取所有最小值,我可以:

df.apply(lambda x:x.min(), axis = 0)

我想到了类似的东西(当然这是行不通的):

df_final.apply(lambda x:x.max() if x =='d' else x.min(), axis = 0)

我发现this question的作用类似,尽管条件语句基于每列的,而我希望我的if else语句基于列名。事实是x即可迭代对象不是列名。如果基于列名称使用lambda函数的其他条件,我该如何应用?

5 个答案:

答案 0 :(得分:6)

使用numpy.where

a = np.where(df.columns == 'd', df.max(), df.min()).tolist()
print (a)
[77.26, 94.06, 0.71, 0.29]

答案 1 :(得分:3)

通常,请尽量不要使用apply,因为它不会执行矢量化操作(即速度较慢)。

在这里,您只需选择所需的列并汇总列表即可

min_cols = ['a', 'b', 'c']
max_cols = ['d']

>>> df[min_cols].min().tolist() + df[max_cols].max().tolist()
[77.26, 94.06, 0.71, 0.29]

答案 2 :(得分:2)

在字典中使用// // PlaceDetailVC.swift // Memorable Places // // import UIKit import CoreLocation import RealmSwift class PlaceDetailVC: UIViewController, UITableViewDelegate, UITableViewDataSource { @IBOutlet weak var address: UILabel! @IBOutlet weak var placesTable: UITableView! var selectedPlace : Place = Place() var selectedTrip : Trip = Trip() var distances = [ String ]() var places : Results<Place>? override func viewDidLoad() { super.viewDidLoad() address.text = selectedPlace.name //register xib file placesTable.register(UINib(nibName: "PlaceDetailCell", bundle: nil), forCellReuseIdentifier: "customPlaceDetailCell") } override func viewDidAppear(_ animated: Bool) { load() if selectedPlace != nil && places != nil { for i in 0..<places!.count { let latitude = Double(places![i].lat) let longitude = Double(places![i].lon) let currentLatitude = Double(selectedPlace.lat) let currentLongitude = Double(selectedPlace.lon) //print(latitude,longitude,currentLatitude,currentLongitude) let coordinate = CLLocation(latitude: latitude, longitude: longitude) let currentCoordinate = CLLocation(latitude: currentLatitude, longitude: currentLongitude) let distanceInMeters = coordinate.distance(from: currentCoordinate) // result is in meters let distanceInMiles = distanceInMeters/1609.344 distances.append(String(format: "%.2f", distanceInMiles)) } } } // --------------------------------------------------------------------------------------------------------- //MARK - CRUD functions //Read func load() { places = selectedTrip.places.sorted(byKeyPath: "name", ascending: true) //print(places,"<<<") placesTable.reloadData() } // --------------------------------------------------------------------------------------------------------- //MARK - Table View Datasource func numberOfSections(in tableView: UITableView) -> Int { return 1 } func tableView(_ tableView: UITableView, numberOfRowsInSection section: Int) -> Int { return places?.count ?? 0 } func tableView(_ tableView: UITableView, heightForRowAt indexPath: IndexPath) -> CGFloat { return 70 } func tableView(_ tableView: UITableView, cellForRowAt indexPath: IndexPath) -> UITableViewCell { let cell = tableView.dequeueReusableCell(withIdentifier: "customPlaceDetailCell", for: indexPath) as! CustomPlaceDetailCell if selectedPlace.name != nil { cell.address.text = (places![indexPath.row]["name"] as! String) cell.distance.text = distances[indexPath.row] } return cell } // --------------------------------------------------------------------------------------------------------- //MARK - Table View Delegate func tableView(_ tableView: UITableView, canEditRowAt indexPath: IndexPath) -> Bool { return true } func tableView(_ tableView: UITableView, didSelectRowAt indexPath: IndexPath) { activePlace = indexPath.row } }

agg

输出:

df.agg({'a':'min','b':'min','c':'min','d':'max'}).tolist()

答案 3 :(得分:1)

您可以在系列上使用name属性:

df.apply(lambda x: x.max() if x.name == 'd' else x.min())
#a    77.26
#b    94.06
#c     0.71
#d     0.29
#dtype: float64

天真 Timings 供您参考,假设您没有很多列:

小数据框:

df = pd.DataFrame({'a':[78.78, 77.26], 'b':[94.47,94.06], 'c':[0.72, 0.71], 'd':[0.19, 0.29]})
​    
%timeit df.apply(lambda x: x.max() if x.name == 'd' else x.min()).tolist()
# 770 µs ± 9.88 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

%timeit pd.np.where(df.columns == 'd', df.max(), df.min()).tolist()
# 268 µs ± 7.93 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

%timeit df.agg({'a':'min','b':'min','c':'min','d':'max'}).tolist()
# 814 µs ± 22.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

%timeit df[min_cols].min().tolist() + df[max_cols].max().tolist()
# 1.02 ms ± 11.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

%timeit df.describe().loc['min','a':'c'].tolist()+df.describe().loc['max',['d']].tolist()
# 18.7 ms ± 317 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

大数据框:

df = pd.DataFrame({'a':[78.78, 77.26], 'b':[94.47,94.06], 'c':[0.72, 0.71], 'd':[0.19, 0.29]})
​
df = pd.concat([df] * 10000)

%timeit df.apply(lambda x: x.max() if x.name == 'd' else x.min()).tolist()
# 1.03 ms ± 16.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

%timeit pd.np.where(df.columns == 'd', df.max(), df.min()).tolist()
#1.81 ms ± 27.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

%timeit df.agg({'a':'min','b':'min','c':'min','d':'max'}).tolist()
# 1.07 ms ± 13.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

%timeit df[min_cols].min().tolist() + df[max_cols].max().tolist()
# 1.9 ms ± 30.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

%timeit df.describe().loc['min','a':'c'].tolist()+df.describe().loc['max',['d']].tolist()
# 25.7 ms ± 752 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)

答案 4 :(得分:0)

使用describe

df.describe().loc['min','a':'c'].tolist()+df.describe().loc['max',['d']].tolist()
Out[276]: [77.26, 94.06, 0.71, 0.29]