我有一个非常大的数据框,用于电池放电计划,一年中的每一秒。
基本系列事件是
数据框看起来像这样......(原谅我糟糕的格式化)
Index | Freq | Case | Battery OP | Power Required | Battery Energy | SOC | Response timer | Charge Power |
01/01/2016 | 49.5862 | C | Discharging | 300.512 | 1500 | 99.85 | 3 | 0 |
01/01/2016 | 49.5862 | C | Charging | 0 | 1500 | 99.85 | 3 | 1500 |
我目前正在使用for循环和一些if / elseif语句来遍历每一行,检查电池是否需要充电。
我觉得它效率很低。我可能会耗尽内存,或者需要几天才能完成。
我让它在周末运行,但仍然没有完成
我确定有更好的方法可以做到这一点,但我不知道。问题是它必须是顺序的。充电状态或电池能量需要根据电池的输入或输出功率以及之前的SOC%/能量计算每秒的能量。
此处可重现的代码(试图尽可能地减少它)
import numpy as np
import pandas as pd
Battery_W = 1000
Battery_Wh = 1000/ 3
starting_SOC = 0.75
charge_delay = 5
charging = False
year_test = pd.DataFrame(data = [50.00,50.00,49.99,49.98,49.87,49.76,49.65,49.25,50.00,50.00,50.00,50.00,50.00,50.00,49.99,49.78,49.67,49.46,49.25,49.25,50.00,50.00,50.00,49.95,49.65,49.45,49.65,49.55,50.00,50.00,50.00,50.00,50.00,50.00,50.00,49.95,49.65,49.45,49.65,49.55,49.99,49.68,50.00,50.00,50.00,50.00,50.00,50.00,50.00,50.00],index = range(0,50),columns= ['Freq'])
case_conditions = [
(year_test['Freq'] <= 49.75 ),
(year_test['Freq'] > 49.75 )
]
choices = ['C', 'B']
year_test['Case'] = np.select(case_conditions, choices, default='No Case')
"Battery Operation mode"
op_conditions = [
(year_test['Case'] == 'C'),
(year_test['Case'] == 'B')
]
#%%
op_choices = ['Discharging','Idle']
year_test['Battery OP']= np.select(op_conditions, op_choices, default = 'No OP Mode')
"Calculate power output required"
power_conditions = [
(year_test['Case'] == 'B'),
(year_test['Case'] == 'C')
]
power_choices = [1000,0]
year_test['Power Required']= np.select(power_conditions, power_choices, default = 0)
year_test['Battery Energy'] = 0.0
year_test['SOC%'] = 0
"Response Timer"
year_test['Response timer'] = year_test.groupby('Battery OP').cumcount()
year_test['Response timer'][year_test['Battery OP'] == 'Idle' ] = 0
year_test['Charge Power'] = 0.00
year_test['Battery Energy'] = 0.0
year_test['Battery Energy'].iloc[0] = Battery_Wh * starting_SOC
year_test['Battery Energy'].iloc[0:charge_delay] = Battery_Wh * starting_SOC
for j in range(charge_delay, len(year_test)):
if year_test.iloc[j-(charge_delay) ,3] > 0 and year_test.iloc[j - ((charge_delay) -1), 3] == 0 :
"charge at max rate"
year_test.iloc[j,7] = Battery_W
year_test.iloc[j,2] = "Charging"
charging = True
elif charging == True and year_test.iloc[j-1,4] < starting_SOC * Battery_Wh:
"check if battery charged"
year_test.iloc[j,7] = Battery_W
year_test.iloc[j,2] = "Charging"
elif year_test.iloc[j-1,4] >= starting_SOC * Battery_Wh or charging == False:
charging = False
year_test.iloc[j,7] = 0.0
"New Battery Energy"
year_test.iloc[j,4] = year_test.iloc[(j-1),4] - ((year_test.iloc[j,3])/60/60) + ((year_test.iloc[j,7])/60/60)
if year_test.iloc[j,4] > Battery_Wh :
year_test.iloc[j,4] = Battery_Wh
"Calculate battery SOC% for empty"
year_test['SOC%'] = year_test['Battery Energy'] / Battery_Wh * 100
答案 0 :(得分:0)
执行此操作的最佳方法是使用panda数据框的apply方法,因为内存不足。这种方法称为矢量化。
示例如下 Public Sub exceljson()
Dim https As Object, Json As Object, i As Integer
Dim Item As Variant
Set https = CreateObject("MSXML2.XMLHTTP")
https.Open "GET", "https://min-api.cryptocompare.com/data/price?fsym=USD&tsyms=BTC", False
https.Send
Set Json = JsonConverter.ParseJson(https.responseText)
i = 2
For Each Item In Json.Items
Sheets(1).Cells(i, 2).Value = Item
i = i + 1
Next
MsgBox ("complete")
End Sub
您可以查看文档以获取更多详细信息:http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.apply.html#pandas.DataFrame.apply
答案 1 :(得分:0)
这就是我可能重写代码的方式。我只是将最初的7列缩减为字典,然后使用pd.DataFrame()
将它们转换为正确的DataFrame。然后,我只需在迭代构建的DataFrame时应用if...elif
语句。
import numpy as np
import pandas as pd
Battery_W = 1000
Battery_Wh = 1000/ 3
starting_SOC = 0.75
charge_delay = 5
charging = True
#initialize test Dictionary
test = {}
#add your test elements as a tuple
data = (50.00,50.00,49.99,49.98,49.87,49.76,49.65,49.25,50.00,50.00,50.00,50.00,50.00,50.00,49.99,49.78,49.67,49.46,49.25,49.25,50.00,50.00,50.00,49.95,49.65,49.45,49.65,49.55,50.00,50.00,50.00,50.00,50.00,50.00,50.00,49.95,49.65,49.45,49.65,49.55,49.99,49.68,50.00,50.00,50.00,50.00,50.00,50.00,50.00,50.00)
index = 0
#"Battery Operation mode" is not calculated seperately now
#"Calculate power output required" is not calculated seperately now
for d in data:
if d <= 49.75:
test[index] = {'Freq': d,
'Case': 'C',
'Battery_OP':'Discharging',
'Power_Required':0,
'Battery_Energy':0.0,
'SOC':0,
'Charge_Power' :0.0
}
elif d > 49.75:
test[index] = {'Freq': d,
'Case': 'B',
'Battery_OP': 'Idle',
'Power_Required': 1000,
'Battery_Energy': 0.0,
'SOC': 0,
'Charge_Power': 0.0}
index +=1
#This is how I convert the dictionary into a df for the first-time
year_test = pd.DataFrame(test.values())
year_test['Response_timer'] = year_test.groupby('Battery_OP').cumcount()
year_test['Response_timer'][year_test['Battery_OP'] == 'Idle'] = 0
year_test['ChargePower'] = 0.00
year_test['BatteryEnergy'] = 0.0
year_test['BatteryEnergy'].iloc[0:charge_delay] = Battery_Wh * starting_SOC
j = charge_delay
#instead of using the range(), try to manipulate it using `itertuples()`
#This is most probably where you are losing your time..
for row in year_test.itertuples():
if row.Index <5:
continue
if year_test.iloc[j-charge_delay, 3] > 0 and year_test.iloc[j - ((charge_delay) -1), 3] == 0 :
"charge at max rate"
year_test.iloc[j,7] = Battery_W
year_test.iloc[j,2] = "Charging"
charging = True
elif charging == True and year_test.iloc[j-1,4] < starting_SOC * Battery_Wh:
"check if battery charged"
year_test.iloc[j,7] = Battery_W
year_test.iloc[j,2] = "Charging"
elif year_test.iloc[j-1,4] >= starting_SOC * Battery_Wh or charging == False:
charging = False
year_test.iloc[j,7] = 0.0
"New Battery Energy"
year_test.iloc[j,4] = year_test.iloc[(j-1),4] - ((year_test.iloc[j,3])/60/60) + ((year_test.iloc[j,7])/60/60)
if year_test.iloc[j,4] > Battery_Wh :
year_test.iloc[j,4] = Battery_Wh
"Calculate battery SOC% for empty"
year_test['SOC'] = year_test['BatteryEnergy'] / Battery_Wh * 100