我有一个数据框df
,如下所示:
import pandas as pd
df = pd.DataFrame({'a':[78.78, 77.26], 'b':[94.47,94.06], 'c':[0.72, 0.71], 'd':[0.19, 0.29]})
对于列a
,b
和c
,我想提取最小值(到列表中),而对于列d
,我想获取最大值值,即:
[77.26, 94.06, 0.71, 0.29]
我主要是想通过lambda表达式来做到这一点
例如,要获取所有最小值,我可以:
df.apply(lambda x:x.min(), axis = 0)
我想到了类似的东西(当然这是行不通的):
df_final.apply(lambda x:x.max() if x =='d' else x.min(), axis = 0)
我发现this question的作用类似,尽管条件语句基于每列的值,而我希望我的if else语句基于列名。事实是x
即可迭代对象不是列名。如果基于列名称使用lambda函数的其他条件,我该如何应用?
答案 0 :(得分:6)
使用numpy.where
:
a = np.where(df.columns == 'd', df.max(), df.min()).tolist()
print (a)
[77.26, 94.06, 0.71, 0.29]
答案 1 :(得分:3)
通常,请尽量不要使用apply
,因为它不会执行矢量化操作(即速度较慢)。
在这里,您只需选择所需的列并汇总列表即可
min_cols = ['a', 'b', 'c']
max_cols = ['d']
>>> df[min_cols].min().tolist() + df[max_cols].max().tolist()
[77.26, 94.06, 0.71, 0.29]
答案 2 :(得分:2)
在字典中使用//
// PlaceDetailVC.swift
// Memorable Places
//
//
import UIKit
import CoreLocation
import RealmSwift
class PlaceDetailVC: UIViewController, UITableViewDelegate, UITableViewDataSource {
@IBOutlet weak var address: UILabel!
@IBOutlet weak var placesTable: UITableView!
var selectedPlace : Place = Place()
var selectedTrip : Trip = Trip()
var distances = [ String ]()
var places : Results<Place>?
override func viewDidLoad() {
super.viewDidLoad()
address.text = selectedPlace.name
//register xib file
placesTable.register(UINib(nibName: "PlaceDetailCell", bundle: nil), forCellReuseIdentifier: "customPlaceDetailCell")
}
override func viewDidAppear(_ animated: Bool) {
load()
if selectedPlace != nil && places != nil {
for i in 0..<places!.count {
let latitude = Double(places![i].lat)
let longitude = Double(places![i].lon)
let currentLatitude = Double(selectedPlace.lat)
let currentLongitude = Double(selectedPlace.lon)
//print(latitude,longitude,currentLatitude,currentLongitude)
let coordinate = CLLocation(latitude: latitude, longitude: longitude)
let currentCoordinate = CLLocation(latitude: currentLatitude, longitude: currentLongitude)
let distanceInMeters = coordinate.distance(from: currentCoordinate) // result is in meters
let distanceInMiles = distanceInMeters/1609.344
distances.append(String(format: "%.2f", distanceInMiles))
}
}
}
// ---------------------------------------------------------------------------------------------------------
//MARK - CRUD functions
//Read
func load() {
places = selectedTrip.places.sorted(byKeyPath: "name", ascending: true)
//print(places,"<<<")
placesTable.reloadData()
}
// ---------------------------------------------------------------------------------------------------------
//MARK - Table View Datasource
func numberOfSections(in tableView: UITableView) -> Int {
return 1
}
func tableView(_ tableView: UITableView, numberOfRowsInSection section: Int) -> Int {
return places?.count ?? 0
}
func tableView(_ tableView: UITableView, heightForRowAt indexPath: IndexPath) -> CGFloat {
return 70
}
func tableView(_ tableView: UITableView, cellForRowAt indexPath: IndexPath) -> UITableViewCell {
let cell = tableView.dequeueReusableCell(withIdentifier: "customPlaceDetailCell", for: indexPath)
as! CustomPlaceDetailCell
if selectedPlace.name != nil {
cell.address.text = (places![indexPath.row]["name"] as! String)
cell.distance.text = distances[indexPath.row]
}
return cell
}
// ---------------------------------------------------------------------------------------------------------
//MARK - Table View Delegate
func tableView(_ tableView: UITableView, canEditRowAt indexPath: IndexPath) -> Bool {
return true
}
func tableView(_ tableView: UITableView, didSelectRowAt indexPath: IndexPath) {
activePlace = indexPath.row
}
}
:
agg
输出:
df.agg({'a':'min','b':'min','c':'min','d':'max'}).tolist()
答案 3 :(得分:1)
您可以在系列上使用name
属性:
df.apply(lambda x: x.max() if x.name == 'd' else x.min())
#a 77.26
#b 94.06
#c 0.71
#d 0.29
#dtype: float64
天真 Timings 供您参考,假设您没有很多列:
小数据框:
df = pd.DataFrame({'a':[78.78, 77.26], 'b':[94.47,94.06], 'c':[0.72, 0.71], 'd':[0.19, 0.29]})
%timeit df.apply(lambda x: x.max() if x.name == 'd' else x.min()).tolist()
# 770 µs ± 9.88 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
%timeit pd.np.where(df.columns == 'd', df.max(), df.min()).tolist()
# 268 µs ± 7.93 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
%timeit df.agg({'a':'min','b':'min','c':'min','d':'max'}).tolist()
# 814 µs ± 22.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
%timeit df[min_cols].min().tolist() + df[max_cols].max().tolist()
# 1.02 ms ± 11.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
%timeit df.describe().loc['min','a':'c'].tolist()+df.describe().loc['max',['d']].tolist()
# 18.7 ms ± 317 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
大数据框:
df = pd.DataFrame({'a':[78.78, 77.26], 'b':[94.47,94.06], 'c':[0.72, 0.71], 'd':[0.19, 0.29]})
df = pd.concat([df] * 10000)
%timeit df.apply(lambda x: x.max() if x.name == 'd' else x.min()).tolist()
# 1.03 ms ± 16.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
%timeit pd.np.where(df.columns == 'd', df.max(), df.min()).tolist()
#1.81 ms ± 27.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
%timeit df.agg({'a':'min','b':'min','c':'min','d':'max'}).tolist()
# 1.07 ms ± 13.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
%timeit df[min_cols].min().tolist() + df[max_cols].max().tolist()
# 1.9 ms ± 30.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
%timeit df.describe().loc['min','a':'c'].tolist()+df.describe().loc['max',['d']].tolist()
# 25.7 ms ± 752 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
答案 4 :(得分:0)
使用describe
df.describe().loc['min','a':'c'].tolist()+df.describe().loc['max',['d']].tolist()
Out[276]: [77.26, 94.06, 0.71, 0.29]