我是R的初学者,正在研究泰坦尼克数据集。 我想拆分这样的全名
进入:
但是tidyr中的单独函数也在分割期间添加了一个whitspace。我该如何避免这种情况? 提前谢谢。
这是我的代码
require('ggplot2') # visualization
require('ggthemes') # visualization
require('scales') # visualization
require('dplyr') # data manipulation
require('mice') # imputation
require('randomForest') # classification algorithm
require('tidyr')
setwd('~/Downloads/Titanic dataset/')
train <- read.csv('./train.csv')
test <- read.csv('./test.csv')
full <- bind_rows(train,test)
names<-full["Name"]
names$Name<-gsub('\\"','',names$Name)
names$Name<-gsub('\\(.*\\)','',names$Name)
names<-separate(names,Name,into =c("lastname","firstname"),sep="[\\,]")
names<-separate(names,firstname,into =c("title","firstname"),sep="[\\.]")
full<-bind_cols(names,full)
#full$title<-gsub(" ",'',full$title)
full$title<-trimws(x,'b')
rare_title<- c('Capt','Don','Dona','Jonkheer','Lady','Sir',
'the Countess','Major','col','Major','Rev')
full$title[full$title =="Mlle"] <- "Miss"
full$title[full$title =='Ms'] <- 'Miss'
full$title[full$title =='Mme'] <- 'Mrs'
full$title[full$title %in% rare_title] <- "rare_title"
table(full$Sex, full$title)