我有一分钟的数据:
<TouchableHighlight underlayColor='none' />
我想删除分钟差比日差更好的观察结果,在这里我们指分钟条的日期。我想将此规则应用于除卷以外的所有列。代码的开头:
# Import data
import yfinance as yf
data = yf.download(tickers="MSFT", period="7d", interval="1m")
print(data.tail())
答案 0 :(得分:0)
minute_diff = data.diff().reset_index()
dail_diff = data.resample('D').last().diff().median()
cols = minute_diff.columns.to_list()
cols.remove('Datetime')
for c in cols:
minute_diff = minute_diff[(minute_diff[c] <= dail_diff[c])|(minute_diff[c].isnull())]
data = data.loc[minute_diff['Datetime']]
答案 1 :(得分:0)
import pandas as pd
# Import data
import yfinance as yf
data = yf.download(tickers="MSFT", period="7d", interval="1m")
data_minute = data.copy()
data_minute['Date'] = data_minute.index.astype('datetime64[ns]')
data_minute['Date'] = data_minute['Date'].dt.normalize()
#Create new column for difference of current close minus previous close
data_minute['Minute Close Difference'] = data_minute['Close'] - data_minute['Close'].shift(1)
#Convert minute data to daily data
data_daily = data_minute.resample('D').agg({'Open':'first',
'High':'max',
'Low':'min',
'Close':'last',
'Adj Close':'last',
'Volume':'sum'
})
data_daily['Date'] = data_daily.index.astype('datetime64[ns]')
data_daily['Date'] = data_daily['Date'].dt.normalize()
data_daily = data_daily.set_index('Date')
#Create new column for difference of current close minus previous close
data_daily['Daily Close Difference'] = data_daily['Close'] - data_daily['Close'].shift(1)
data_minute = pd.merge(data_minute,data_daily['Daily Close Difference'],how = 'left', left_on = 'Date', right_index = True)
data_minute = data_minute[data_minute['Minute Close Difference'].abs() <= data_minute['Daily Close Difference'].abs()]
data_minute
答案 2 :(得分:0)
我找到了解决方法:
daily_diff = data.resample('D').last().dropna().diff() * 25
daily_diff['diff_date'] = daily_diff.index.strftime('%Y-%m-%d')
data_test = data.diff()
data_test['diff_date'] = data_test.index.strftime('%Y-%m-%d')
data_test_diff = pd.merge(data_test, daily_diff, on='diff_date')
data_test_final = data_test_diff.loc[(np.abs(data_test_diff['close_x']) < np.abs(data_test_diff['close_y']))]
data_test_final['close_x'].plot()
indexer = (np.abs(data_test_diff['close_x']) < np.abs(data_test_diff['close_y']))
data_final = data.loc[indexer.values, :]