Question

这是回归时的代码工作，但在分类时不起作用

将pandas导入为pd 将xgboost导入为xgb 导入numpy为np import itertools

salesPath = "E:\\python\\Salesprog\\"

test = pd.read_excel(salesPath + 'test.xlsx')
test.describe()

def latinizator(letter, dic):
    for i, j in dic.items():
        letter = letter.replace(i, j)
    return letter
>

这是拉丁语，它运作良好

>
legend = {
'а':'a',
'б':'b',
'в':'v',
'г':'g',
'д':'d',
'е':'e',
'ё':'yo',
'ж':'zh',
'з':'z',
'и':'i',
'й':'y',
'к':'k',
'л':'l',
'м':'m',
'н':'n',
'о':'o',
'п':'p',
'р':'r',
'с':'s',
'т':'t',
'у':'u',
'ф':'f',
'х':'h',
'ц':'ts',
'ч':'ch',
'ш':'sh',
'щ':'shch',
'ъ':'y',
'ы':'y',
'ь':"'",
'э':'e',
'ю':'yu',
'я':'ya',

'А':'A',
'Б':'B',
'В':'V',
'Г':'G',
'Д':'D',
'Е':'E',
'Ё':'Yo',
'Ж':'Zh',
'З':'Z',
'И':'I',
'Й':'Y',
'К':'K',
'Л':'L',
'М':'M',
'Н':'N',
'О':'O',
'П':'P',
'Р':'R',
'С':'S',
'Т':'T',
'У':'U',
'Ф':'F',
'Х':'H',
'Ц':'Ts',
'Ч':'Ch',
'Ш':'Sh',
'Щ':'Shch',
'Ъ':'Y',
'Ы':'Y',
'Ь':"'",
'Э':'E',
'Ю':'Yu',
'Я':'Ya',
}
phrases = []
for line in test['column_10']:
        phrases.append(latinizator(line, legend))
phrases = pd.DataFrame(phrases, columns = {'column_10'})

这是xgb回归量，但是当分类时，不工作

>
test_y = test[['y_1','y_2','y_3','y_4']]
test_x = test.drop(['column_10','y_1','y_2','y_3','y_4'], axis=1)
test_x_exp2 = test_x**2
for i in list(test_x_exp2):
    test_x_exp2.rename(columns = {i:i+'exp2'}, inplace = True)
test_x_exp3 = test_x**3
for i in list(test_x_exp3):
    test_x_exp3.rename(columns = {i:i+'exp3'}, inplace = True)
test_x_exp4 = test_x**4
for i in list(test_x_exp4):
    test_x_exp4.rename(columns = {i:i+'exp4'}, inplace = True)
test_x_exp12 = test_x**(1/2)
for i in list(test_x_exp12):
    test_x_exp12.rename(columns = {i:i+'exp12'}, inplace = True)
test_x_log = np.log2(test_x)
for i in list(test_x_log):
    test_x_log.rename(columns = {i:i+'log'}, inplace = True)
test_x_sin = np.sin(test_x)
for i in list(test_x_sin):
    test_x_sin.rename(columns = {i:i+'sin'}, inplace = True)
test_x_cos = np.cos(test_x)
for i in list(test_x_cos):
    test_x_cos.rename(columns = {i:i+'cos'}, inplace = True)
summ = test_x 
b = []    
for i in range(2,9):
    for j in list(itertools.combinations(['column_1','column_2','column_4', 'column_5', 'column_6','column_7','column_8','column_9'],i)):
        b.append(j)
for i in b:
    a = 0
    nazv = ''
    for j in i:
        nazv = nazv + str(j)
        a = a + test_x[''+str(j)+'']
    a = pd.DataFrame(a, columns={nazv + 'plus'})    
    summ = summ.join(a)
for i in b:
    a = 0
    nazv = ''
    for j in i:
        nazv = nazv + str(j)
        a = a * test_x[''+str(j)+'']
    a = pd.DataFrame(a, columns={nazv + 'multi'})    
    summ = summ.join(a)
summ = summ.join(test_x_exp2).join(test_x_exp3).join(test_x_exp4).join(test_x_exp12).join(test_x_log).join(test_x_cos).join(test_x_sin)

cat_feat thisi是行

cat_feat = ['column_10']
one_hot = pd.get_dummies(phrases['column_10'])
rdf = summ.join(one_hot)
rdf = rdf.join(test_y[['y_2']])
rdf = rdf.join(test_y[['y_1']])

pd.set_option("display.max_columns",100)
rdf.corr()[rdf.corr() > 0.1]

from sklearn.model_selection import train_test_split
trg = test_y[['y_2']]
trn = rdf.drop(['y_1','y_2'], axis=1)


X_train, X_test, y_train, y_test = train_test_split(trn, trg, test_size=0.3, random_state=42)
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV

xgb_model = xgb.XGBRegressor()


cv = 10
#First step
alpha=[i for i in range(40, 600, 20)]
xgb_params  = [
    {
    "n_estimators": alpha
    }
]
#nacenka.to_excel(salesPath + 'nacenka111.xlsx')
xgb_grid = GridSearchCV(xgb_model, xgb_params, scoring='r2', cv=cv, n_jobs=-1, verbose=2)
xgb_grid.fit(X_train, y_train)

其他rezult

#First result check
xgb_best = xgb.XGBRegressor(n_estimators=xgb_grid.best_params_['n_estimators'])
xgb_best.fit(X_train, y_train)
best_predictions = xgb_best.predict(X_test)
r2_score(y_test, best_predictions)
best_predictions1 = pd.DataFrame(best_predictions)

r2_score(y_test, xgb_grid.predict(X_test))

import matplotlib.pyplot as plot
pred = xgb_best.booster().get_score(importance_type='weight')
print(xgb_best.booster().get_score(importance_type='weight'))
df = pd.DataFrame([pred])
df.plot(kind='bar')

#Second step
alpha1=[i for i in range(3, 10, 2)]
alpha2=[i for i in range(1, 6, 1)]
xgb_params  = [
    {
    "learning_rate": [0.1],
    "n_estimators": [xgb_grid.best_params_['n_estimators']],
    "max_depth": alpha1,
    "min_child_weight": alpha2
    }
]
xgb_grid = GridSearchCV(xgb_model, xgb_params, scoring='r2', cv=cv, n_jobs=-1, verbose=3)
xgb_grid.fit(X_train, y_train)

print(xgb_grid.best_params_)

#Third step
# step 2b - tuning max_depth and min_child_weight
xgb_params  = [
    {
    "learning_rate": [0.1],
    "n_estimators": [xgb_grid.best_params_['n_estimators']],
    "max_depth": [xgb_grid.best_params_['max_depth']-1, xgb_grid.best_params_['max_depth'], xgb_grid.best_params_['max_depth']+1],
    "min_child_weight": [xgb_grid.best_params_['min_child_weight']-1, xgb_grid.best_params_['min_child_weight'], xgb_grid.best_params_['min_child_weight']+1]
    }
]

xgb_grid = GridSearchCV(xgb_model, xgb_params, scoring='r2', cv=cv, n_jobs=4, verbose=3)
xgb_grid.fit(X_train, y_train)

print(xgb_grid.best_params_)

#Fourth step tuning gamma
xgb_params  = [
    {
    "learning_rate": [0.1],
    "n_estimators": [xgb_grid.best_params_['n_estimators']],
    "max_depth": [xgb_grid.best_params_['max_depth']],
    "min_child_weight": [xgb_grid.best_params_['min_child_weight']],
    "gamma": [i/10.0 for i in range(0,5)]
    }
]

xgb_grid = GridSearchCV(xgb_model, xgb_params, scoring='r2', cv=cv, n_jobs=4, verbose=3)
xgb_grid.fit(X_train, y_train)

print(xgb_grid.best_params_)

## step 4 - tuning subsample, colsample_bytree
xgb_params  = [
    {
    "learning_rate": [0.1],
    "n_estimators": [xgb_grid.best_params_['n_estimators']],
    "max_depth": [xgb_grid.best_params_['max_depth']],
    "min_child_weight": [xgb_grid.best_params_['min_child_weight']],
    "gamma": [xgb_grid.best_params_['gamma']],
    "subsample": [i/10.0 for i in range(6,10)],
    "colsample_bytree": [i/10.0 for i in range(6,10)]
    }
]

xgb_grid = GridSearchCV(xgb_model, xgb_params, scoring='r2', cv=cv, n_jobs=4, verbose=3)
xgb_grid.fit(X_train, y_train)

print(xgb_grid.best_params_)

# step 5a - tuning regularization
xgb_params  = [
    {
    "learning_rate": [0.1],
    "n_estimators": [xgb_grid.best_params_['n_estimators']],
    "max_depth": [xgb_grid.best_params_['max_depth']],
    "min_child_weight": [xgb_grid.best_params_['min_child_weight']],
    "gamma": [xgb_grid.best_params_['gamma']],
    "subsample": [xgb_grid.best_params_['subsample']],
    "colsample_bytree": [xgb_grid.best_params_['colsample_bytree']],
    'reg_alpha': [1e-5, 0, 0.001, 0.005, 0.01, 1e-05, 0.05, 0.1, 1, 2, 5, 10, 100]
    }
]

xgb_grid = GridSearchCV(xgb_model, xgb_params, scoring='r2', cv=cv, n_jobs=4, verbose=3)
xgb_grid.fit(X_train, y_train)
print(xgb_grid.best_params_)

#Получаем параметры согласно обученной модели
xgb_best = xgb.XGBRegressor(n_estimators=xgb_grid.best_params_['n_estimators'],
                              learning_rate=0.1,
                              max_depth=6, 
                              min_child_weight=8,
                              gamma=xgb_grid.best_params_['gamma'], 
                              subsample=xgb_grid.best_params_['subsample'], 
                              colsample_bytree=xgb_grid.best_params_['colsample_bytree'], 
                              reg_alpha=xgb_grid.best_params_['reg_alpha'])
xgb_best.fit(X_train, y_train)

r2_score(y_test, xgb_best.predict(X_test))

当regressor启动时出现分类器问题

Answer 1

任何搜索引擎都会指向您： https://pypi.python.org/pypi/pip

在那里，您可以下载wheel个文件，然后可以在本地使用pip工具进行安装。如果您的公司阻止访问该站点，请在其他位置下载所需的软件包。请小心检查这些软件包的依赖关系，并下载其他软件包。

对于Windows，另一种选择是Christoph Gohlke的Unofficial Windows Binaries。

P.S。：sys和warnings始终可用。

Answer 2

像easy_instsall和pip这样的Python包管理工具包只需将带有可执行python代码的包文件夹下载到本地文件夹，该文件夹包含在模块搜索路径中并管理依赖项。

你可以获得你需要的特定模块的源文件并放入你的工作目录中，你很高兴

例如，你可以

git clone https://github.com/numpy/numpy.git numpy

将numpy作为项目中的文件夹下载到您的项目中（或者您可能希望使用另一台能够连接互联网并将该文件夹与USB驱动器一起复制的机器下载东西）

此外，关于Python库搜索路径的这个manual可能会帮助您理解钩子下的所有内容

你应该通过这种方式小心依赖性东西。

Answer 3

https://pypi.python.org/pypi提供所有套餐单击以下链接下载相应的包：

matplotlib - https://pypi.python.org/pypi/matplotlib

Answer 4

我肯定会看看Anaconda。 https://www.continuum.io/downloads

包括的包裹清单： https://docs.continuum.io/anaconda/packages/pkg-docs

据我所知，唯一不包括的包是＃34;警告＆＃34;。

我可以在哪里下载python包到windows（离线安装）

4 个答案: