我正在尝试编写一个python代码,从SQL中对表中的某些数据执行回归,然后将已清理数据的一些参数导出回新的SQL表。我的代码:
import pandas as pd
import numpy as np
import pandas.io.sql as psql
import pypyodbc
from pandas.stats.api import ols
import statsmodels.api as sm
#from statsmodels.formula.api import ols
conn = pypyodbc.connect("DRIVER={SQL Server};SERVER=Server Add;DATABASE=Database;UID=UID;PWD=PWD")
Data1 = pd.read_sql('SELECT net_rate, cohort FROM an.dbo.SL_Stop', conn)
print Data1
dummies = pd.get_dummies(Data1['cohort'],prefix ='Cohort') #Creating Dummies
Data_With_Dummies = Data1[['net_rate']].join(dummies) #Merging Dummies
..........#数据清理过程.......... 然后我对已清理的数据执行回归
mod = sm.OLS(endog, exog)
results = mod.fit()
print results.summary()
print "\n"
print ('Paramters:', results.params )
Data_Params=pd.DataFrame(results.params, columns =['values'])
Data_Params = Data_Params.T
Data_Params = Data_Params.rename(columns={'const':'const_Coef',
'Cohort_2' : 'Cohort_2_Coef',
'Cohort_3':'Cohort_3_Coef'})
Data_Pvalues = pd.DataFrame(results.pvalues, columns = ['values'])
Data_Pvalues= Data_Pvalues.T
Data_Pvalues = Data_Pvalues.rename(columns={'const':'const_Pvalue',
'Cohort_2' : 'Cohort_2_Pvalue',
'Cohort_3':'Cohort_3_Pvalue'})
Data_Concatenate_Coeff_Pvalues = pd.concat([Data_Params,Data_Pvalues],axis = 1)
pd.DataFrame(Data_Concatenate_Coeff_Pvalues,index = ["Coeefi","Pvalue"])
const_Coef = Data_Params['const_Coef']
Cohort_2_Coef = Data_Params['Cohort_2_Coef']
Cohort_3_Coef = Data_Params['Cohort_3_Coef']
const_Pvalue = Data_Pvalues['const_Pvalue']
Cohort_2_Pvalue = Data_Pvalues['Cohort_2_Pvalue']
Cohort_3_Pvalue = Data_Pvalues['Cohort_3_Pvalue']
SQL_INSERT_QUERY = """
INSERT INTO _nrr_cohorts (
[report_month],
[beta_cohort_1],
[p_value_cohort_1],
[beta_cohort_2],
[p_value_cohort_2],
[beta_cohort_3],
[p_value_cohort_3],
[updated_datetime]
)
VALUES (
1,Data_Params['const_Coef'],Data_Params['Cohort_2_Coef'],Data_Params['Cohort_3_Coef'],
Data_Pvalues['const_Pvalue'],Data_Pvalues['Cohort_2_Pvalue'],Data_Pvalues['Cohort_3_Pvalue',3
)
"""
db = conn.cursor()
db.execute(SQL_INSERT_QUERY).commit()
我想将回归参数导回到新的sql表中,但插入代码需要硬编码值是否有方法将DataFrame或这些参数传递给新的sql表
答案 0 :(得分:0)
const_Coef = Data_Params.iat[0,0]
Cohort_2_Coef = Data_Params.iat[0,1]
Cohort_3_Coef = Data_Params.iat[0,2]
const_Pvalue = Data_Pvalues.iat[0,0]
Cohort_2_Pvalue = Data_Pvalues.iat[0,1]
Cohort_3_Pvalue = Data_Pvalues.iat[0,2]
Current_Date_Time = datetime.datetime.now() #Extracting Current Time
db = conn.cursor()
db.execute("INSERT INTO _nrr_cohorts(beta_cohort_1,beta_cohort_2,beta_cohort_3,p_value_cohort_1,p_value_cohort_2,p_value_cohort_3,updated_datetime,report_month)values(?,?,?,?,?,?,?,?)",(const_Coef,Cohort_2_Coef,Cohort_3_Coef,const_Pvalue,Cohort_2_Pvalue,Cohort_3_Pvalue,Current_Date_Time,Report_Month)).commit()
从DataFrame的每个单元格中提取值有所帮助。并使用?传递变量