我试图循环浏览一个文件并替换几件事的所有实例..它在某些时候正在工作,但它在文件中先前正确替换的缺失值我不知道为什么每次都不会工作。
library(h2o) # Professional grade ML pkg
library(tidyquant) # Loads tidyverse and several other pkgs
library(readxl) # Super simple excel reader
library(lime) # Explain complex black-box ML models
library(recipes) # Preprocessing for machine learning
hr_data_raw_tbl <- read_excel(path = "data/WA_Fn-UseC_-HR-Employee-Attrition.xlsx")
hr_data_organized_tbl <- hr_data_raw_tbl %>%
mutate_if(is.character, as.factor) %>%
select(Attrition, everything())
recipe_obj <- hr_data_organized_tbl %>%
recipe(formula = Attrition ~ .) %>%
step_rm(EmployeeNumber) %>%
step_zv(all_predictors()) %>%
step_center(all_numeric()) %>%
step_scale(all_numeric()) %>%
prep(data = hr_data_organized_tbl)
hr_data_bake_tbl <- bake(recipe_obj, newdata = hr_data_organized_tbl)
h2o.init()
hr_data_bake_h2o <- as.h2o(hr_data_bake_tbl)
hr_data_split <- h2o.splitFrame(hr_data_bake_h2o, ratios = c(0.7, 0.15), seed = 1234)
train_h2o <- h2o.assign(hr_data_split[[1]], "train" ) # 70%
valid_h2o <- h2o.assign(hr_data_split[[2]], "valid" ) # 15%
test_h2o <- h2o.assign(hr_data_split[[3]], "test" ) # 15%
y <- "Attrition"
x <- setdiff(names(train_h2o), y)
automl_models_h2o <- h2o.automl(
x = x,
y = y,
training_frame = train_h2o,
validation_frame = valid_h2o,
leaderboard_frame = test_h2o,
max_runtime_secs = 15
)
automl_leader <- automl_models_h2o@leader
explainer <- lime::lime(
as.data.frame(train_h2o[,-1]),
model = automl_leader,
bin_continuous = FALSE
)
explanation <- lime::explain(
x = as.data.frame(test_h2o[1:10,-1]),
explainer = explainer,
n_labels = 1,
n_features = 4,
n_permutations = 500,
kernel_width = 1
)
explanation
答案 0 :(得分:1)
替换方法不会改变它返回新字符串的字符串 试试这个
with open("myfile", "rt") as filein, open("mynewfile", "wt") as fileout
for line in filein:
line = line.replace('a', 'b')
fileout.write(line)
line = line.replace('c', 'd')
fileout.write(line)
...
或更好
list_replace = [('a', 'b'), ('c', 'd'), ...]
with open("myfile", "rt") as filein, open("mynewfile", "wt") as fileout
for line in filein:
for t in list_replace:
line = line.replace(*t)
fileout.write(line)
根据评论进行更新
list_replace = [('a', 'b'), ('c', 'd'), ...]
with open("myfile", "rt") as filein, open("mynewfile", "wt") as fileout
for line in filein:
tmp = line[36:]
for t in list_replace:
tmp = tmp.replace(*t)
fileout.write(line[:36] + tmp)
答案 1 :(得分:0)
无法在Python 3中复制您的问题。以下工作正常:
with open("myfile", "rt") as filein, open("mynewfile", "wt") as fileout:
for line in filein:
fileout.write(line.replace('a', 'b'))
fileout.write(line.replace('c', 'd'))
fileout.write(line.replace('e', 'f'))
fileout.write(line.replace('g', 'h'))
fileout.write(line.replace('i', 'j'))
fileout.write(line.replace('k', 'l'))
print("Done")
您必须在[{1}}结尾处考虑是否存在\n
,因此请检查它。
答案 2 :(得分:0)
您为从filein读入的每一行写出了6行文件。每个新行只替换1个字符,而不是全部字符。
如果您尝试更换线路中的所有内容,请尝试以下操作。 multi_replace 获取搜索字符串列表,并替换字符串,然后迭代它们以在每行上单独替换它们。
def multi_replace(line, searchlist, replacelist):
for i, j in zip(searchlist, replacelist):
line = line.replace(i, j)
return line
with open("myfile", "rt") as filein:
with open("mynewfile", "wt") as fileout:
for line in filein:
fileout.write(multi_replace(line, ['a','c','e','g','i','k'],['b','d','f','h','j','l']))
print("Done")