在数据框python中复制行

时间:2018-10-22 14:00:47

标签: python pandas csv dataframe

大家下午好

我目前正在用python写一篇关于KMV模型的论文。我从代码here中获得了启发,以解决非线性方程。 Here是用于创建数据框的CSV文件的链接。这是我到目前为止的代码:

所需模块的导入

from datetime import datetime
import pandas as pd
import numpy as np
import scipy.optimize as sco
from scipy.stats import norm


df = pd.DataFrame()
df = pd.read_csv("AREX.csv", sep=';', engine = "python", decimal=',')

为模型运行准备文件的功能

def clean():   
#    df.rename(columns ={"Date": "Date"}, inplace = True)
#    df["Date"] = pd.to_datetime(df['Date'])
    df.set_index("Date", inplace = True)
    df['AREX.O']=df['AREX.O'].astype(float)
    df.drop(['Total Short Term debt'], axis =1, inplace = True)
    return df

def preparation():
    df['e']=df['AREX.O']*df['Share Outstanding']
    df['Short Term Debt']=df['Debt']-df['Total Long term Debt']
    df['f']=df['Short Term Debt']+df['Total Long term Debt']*0.5
    df['log_ret'] = np.log(df['AREX.O']) - np.log(df['AREX.O'].shift(1))
#    df['stdev']=df['log_ret'].rolling(252).std()*m.sqrt(252)
    return df

用于求解a和sigma_a的算法。

我只在这里尝试使代码适应我的数据框

def algo1():

#    formatting the vaules as required
    df["f"] = df["f"].astype(float)
    df["e"] = df["e"].astype(float)


#    #computating of key input variable for the model

    df['a'] = df['f'].add(df["e"])


    #defining a function for the black Scholes equation

    def bseqn(a, debug=False):
        d1 = (np.log(a/f) + (r + 0.5*sigma_a**2)*T)/(sigma_a*np.sqrt(T))
        d2 = d1 - sigma_a*np.sqrt(T)
        y1 = e - (a*norm.cdf(d1) - np.exp(-r*T)*f*norm.cdf(d2))

        if debug:
            print("d1 = {:.6f}".format(d1))
            print("d2 = {:.6f}".format(d2))
            print("Error = {:.6f}".format('y1'))

        return y1

    #Solving the model

    time_horizon=[1]
    timesteps = range(1, len(df))

    results = np.empty((df.shape[0],len(time_horizon)))

    #looping to solve for each row

    for i, years in enumerate(time_horizon):
        T = 1
        results[:,i] = df.loc[:,'a'] 
        for i_t, t in enumerate(timesteps):  
            a = results[t-10:t,i]
            ra =np.log(a/np.roll(a,1))
            sigma_a = np.nanstd(ra) #gives initial value of sigma_a

            if i_t == 0:
                subset_timesteps = range(t-1, t+1)
                print(subset_timesteps)
            else:
                subset_timesteps = [t]

            n_its = 0
            while n_its < 10:
                n_its += 1
                for t_sub in subset_timesteps:
                    r = df.iloc[t_sub]['r'] 
                    f = df.iloc[t_sub]['f']
                    e = df.iloc[t_sub]['e']
                    sol = sco.fsolve(bseqn, results[t_sub,i]) #if I replace newton with fsolve the code works properly
                    results[t_sub,i] = sol # stores the new values of a 

                # Update sigma_a based on new values of a
                last_sigma_a = sigma_a
                a = results[t-10:t,i]
                ra = np.log(a/np.roll(a,1))
                sigma_a = np.nanstd(ra) #new val of sigma
                diff = last_sigma_a - sigma_a
                if abs(diff) < 1e-3:
                    df.loc[t_sub,'sigma_a'] = sigma_a
                    break 
            else:

                pass
    return df

运行功能

def run():   
    clean()
    preparation()
    algo1()
    print(df)
    print(list(df))
#    main_df = df.to_csv("AREX_D.csv")

输出应将sigma_a的结果写在已创建的sigma_a列上,但要添加一行而不是1500行,而最终得到3000行,其中大多数是Nan值。我不明白代码在哪里要求...

我怀疑它来自以下几行:

   diff = last_sigma_a - sigma_a
                    if abs(diff) < 1e-3:
                        df.loc[t_sub,'sigma_a'] = sigma_a
                        break 

有人对发生的事情有任何见识吗?

这是输出的图片: The results of the sigma seemed to be moved and start from the last row instead of the samed rows as the other variables

非常感谢!

0 个答案:

没有答案