我正在使用Python 2.7并且我安装了sklearn。
data_io代码
import csv
import json
import os
import pickle
import psycopg2
def paper_ids_to_string(ids):
return " ".join([str(x) for x in ids])
conn_string = None
def get_db_conn():
global conn_string
if conn_string is None:
conn_string = get_paths()["postgres_conn_string"]
if "##AskForPassword##" in conn_string:
password = raw_input("PostgreSQL Password: ")
conn_string = conn_string.replace("##AskForPassword##", password)
conn = psycopg2.connect(conn_string)
return conn
def get_paths():
paths = json.loads(open("SETTINGS.json").read())
for key in paths:
paths[key] = os.path.expandvars(paths[key])
return paths
def save_model(model):
out_path = get_paths()["model_path"]
pickle.dump(model, open(out_path, "w"))
def load_model():
in_path = get_paths()["model_path"]
return pickle.load(open(in_path))
def write_submission(predictions):
submission_path = get_paths()["submission_path"]
rows = [(author_id, paper_ids_to_string(predictions[author_id])) for author_id in predictions]
writer = csv.writer(open(submission_path, "w"), lineterminator="\n")
writer.writerow(("AuthorId", "PaperIds"))
writer.writerows(rows)
def get_features_db(table_name):
conn = get_db_conn()
query = get_features_query(table_name)
cursor = conn.cursor()
cursor.execute(query)
res = cursor.fetchall()
return res
def get_features_query(table_name):
query = open("feature_query.sql").read().strip()
return query.replace("##DataTable##", table_name)
这是火车代码
import data_io
from sklearn.linear_model import SGDClassifier
def main():
print("Getting features for deleted papers from the database")
features_deleted = data_io.get_features_db("TrainDeleted")
print("Getting features for confirmed papers from the database")
features_conf = data_io.get_features_db("TrainConfirmed")
features = [x[2:] for x in features_deleted + features_conf]
target = [0 for x in range(len(features_deleted))] + [1 for x in range(len(features_conf))]
print("Training the Classifier")
classifier = SGDClassifier(alpha=0.0001, class_weight='auto', epsilon=0.1, eta0=0.0, fit_intercept=True, l1_ratio=0.15, learning_rate='optimal', loss='log', n_iter=5, n_jobs=1, penalty='l2', power_t=0.5, random_state=None, rho=None, shuffle=False, verbose=0, warm_start=False)
classifier.fit(features, target)
print("Saving the classifier")
data_io.save_model(classifier)
if __name__=="__main__":
main()
这预测代码
from collections import defaultdict
import data_io
def main():
print("Getting features for valid papers from the database")
data = data_io.get_features_db("ValidPaper")
author_paper_ids = [x[:2] for x in data]
features = [x[2:] for x in data]
print("Loading the classifier")
classifier = data_io.load_model()
print("Making predictions")
predictions = classifier.predict_proba[:,1]
predictions = list(predictions)
author_predictions = defaultdict(list)
paper_predictions = {}
for (a_id, p_id), pred in zip(author_paper_ids, predictions):
author_predictions[a_id].append((pred, p_id))
for author_id in sorted(author_predictions):
paper_ids_sorted = sorted(author_predictions[author_id], reverse=True)
paper_predictions[author_id] = [x[1] for x in paper_ids_sorted]
print("Writing predictions to file")
data_io.write_submission(paper_predictions)
if __name__=="__main__":
main()
当我尝试运行时,我有以下类型错误: predictions = classifier.predict_proba [:,1] TypeError:'instancemethod'对象不可订阅
我该怎么办?
答案 0 :(得分:0)
predict_proba是一种方法。您需要使用特征矩阵或向量来调用它。