如何以更Python的方式计算Pandas DataFrame中的日志返回

时间:2019-02-16 21:58:11

标签: pandas natural-logarithm

您能否帮助我优化代码,因为在65x2500数据帧上运行需要花费很多时间?初始数据框的单元格为空,并且并非所有列都有价格信息。所有包含价格信息的列均以“ PX_LAST”开头(仅以“ P”开头的列)为公司名称。如果日期之间有空白,没有价格信息,我正在计算对有价格的价格的更改,并跳过空白点。您能否帮我改进以下代码:

import numpy as np
import pandas as pd
#First non-NaN index value
def start_integer(column, dataframe):
    start = dataframe[column].first_valid_index()
    start_integer = None
    if start != None:
        start_integer = dataframe.index.get_loc(start)
    return start_integer

#Last non-NaN index value
def end_integer(column, dataframe):
    end = dataframe[column].last_valid_index()
    if end!= None:
        end_integer = dataframe.index.get_loc(end)
    return end_integer

def get_log_returns(dataframe):
    columns = dataframe.columns
    log_returns = dataframe.drop(columns, axis = 1) 

    column_number = - 1
    original_column_number = - 1    

    for col in dataframe.columns:
        dataframe[col] = dataframe[col].astype("float64")
        original_column_number += 1
        if col[:1] == "P":
            column_name = col[8:]
            column_number += 1
            column_length = len(dataframe[col])
            log_returns[column_name] = None
            row_number = -1
            for price in dataframe[col]:
                row_number += 1
                next_row = row_number + 1
                while column_length - 2 > next_row and np.isnan(price) == False and np.isnan(dataframe.iloc[next_row, original_column_number]) == True:
                    next_row += 1
                if row_number != end_integer(col, dataframe)  and np.isnan(price) == False:
                    log_returns.iloc[row_number, column_number] = np.log(price/dataframe.iloc[next_row, original_column_number]).round(5)
                elif np.isnan(price) == True and row_number >= start_integer(col, dataframe) and row_number < end_integer(col, dataframe):
                    log_returns.iloc[row_number, column_number] = 0
                else:
                    log_returns.iloc[row_number, column_number] = np.nan
    log_returns['mean'] = log_returns.mean(axis = 1)
    return log_returns

0 个答案:

没有答案