from __future__ import print_function
import statsmodels.api as sm
import statsmodels.formula.api as smf
import pandas as pd
import pandas
df = pandas.read_csv("volvo_volvo.csv");
dfw = pandas.DataFrame(data = df, columns=['x','y'])
y = df.y.values
x = df.x.values
data = pd.DataFrame({'x' : x, 'y' : y}) #to merge the two side by side
print(data)
formula = 'y ~ x'
mod1 = smf.glm(formula=formula, data=data, family=sm.families.Binomial()).fit()
x = mod1.summary()
print(x)
错误:ValueError:对deviance函数的第一次猜测返回了nan。这可能是一个边界问题,应该报告。
从csv文件列引入数据:
x y
0 50000 16000
1 43000 15000
2 32424 18299
3 92821 14000
4 50392 19000
5 23941 19521
6 29201 20192
7 50192 15291
8 30201 19392
9 20591 18282
10 50401 19294
11 69401 16263
12 22124 16392
13 14940 19583