我如何将这两个字符列表和矩阵粘贴在一起
library(XML)
library(stringr)
library(rlist )
theURL="http://www.loc.gov/rr/print/list/057_chron.html"
presidents=readHTMLTable(theURL,which=4,header=TRUE,stringsAsFactors=FALSE)
name=presidents$PRESIDENT
lastname=str_match(name," [^ ]+$")
t1=str_split(name,lastname)
t1=data.frame(t1,stringsAsFactors=FALSE)
X=sapply(t1,str_split," ")
X=X[(1:65)*2-1]
X=sapply(X,str_sub,1,1)
X=sapply(X,paste0,".")
Z=paste0(X,lastname)
Z
这是我的输出
[1] "G. Washington" "J. Adams" "T. Jefferson"
[4] "T. Jefferson" "J. Madison" "J. Madison"
[7] "J. Madison" "J. Madison" "J. Monroe"
[10] "c(\"J.\", \"Q.\") Adams" "A. Jackson" "A. Jackson"
[13] "c(\"M.\", \"V.\") Buren" "c(\"W.\", \"H.\") Harrison" "J. Tyler"
[16] "c(\"J.\", \"K.\") Polk" "Z. Taylor" "M. Fillmore"
[19] "F. Pierce" "F. Pierce" "J. Buchanan"
[22] "A. Lincoln" "A. Lincoln" "A. Johnson"
[25] "c(\"U.\", \"S.\") Grant" "c(\"U.\", \"S.\") Grant" "c(\"U.\", \"S.\") Grant"
[28] "c(\"R.\", \"B.\") Hayes" "c(\"J.\", \"A.\") Garfield" "c(\"C.\", \"A.\") Arthur"
[31] "G. Cleveland" "G. Cleveland" "B. Harrison"
[34] "G. Cleveland" "W. McKinley" "W. McKinley"
[37] "W. McKinley" "T. Roosevelt" "T. Roosevelt"
[40] "c(\"W.\", \"H.\") Taft" "c(\"W.\", \"H.\") Taft" "W. Wilson"
[43] "c(\"W.\", \"G.\") Harding" "C. Coolidge" "C. Coolidge"
[46] "H. Hoover" "c(\"F.\", \"D.\") Roosevelt" "c(\"F.\", \"D.\") Roosevelt"
[49] "c(\"F.\", \"D.\") Roosevelt" "c(\"H.\", \"S.\") Truman" "c(\"H.\", \"S.\") Truman"
[52] "c(\"D.\", \"D.\") Eisenhower" "c(\"J.\", \"F.\") Kennedy" "c(\"L.\", \"B.\") Johnson"
[55] "c(\"L.\", \"B.\") Johnson" "c(\"R.\", \"M.\") Nixon" "c(\"R.\", \"M.\") Nixon"
[58] "c(\"G.\", \"R.\") Ford" "J. Carter" "R. Reagan"
[61] "G. Bush" "B. Clinton" "c(\"G.\", \"W.\") Bush"
[64] "B. Obama" "c(\"D.\", \"J.\") Trump"
我希望成为
[1] "G. Washington" "J. Adams" "T. Jefferson" "T. Jefferson" "J. Madison"
[6] "J. Madison" "J. Madison" "J. Madison" "J. Monroe" "J. Q. Adams"
[11] "A. Jackson" "A. Jackson" "M. V. Buren" "W. H. Harrison" "J. Tyler"
[16] "J. K. Polk" "Z. Taylor" "M. Fillmore" "F. Pierce" "F. Pierce"
[21] "J. Buchanan" "A. Lincoln" "A. Lincoln" "A. Johnson" "U. S. Grant"
[26] "U. S. Grant" "U. S. Grant" "R. B. Hayes" "J. A. Garfield" "C. A. Arthur"
[31] "G. Cleveland" "G. Cleveland" "B. Harrison" "G. Cleveland" "W. McKinley"
[36] "W. McKinley" "W. McKinley" "T. Roosevelt" "T. Roosevelt" "W. H. Taft"
[41] "W. H. Taft" "W. Wilson" "W. G. Harding" "C. Coolidge" "C. Coolidge"
[46] "H. Hoover" "F. D. Roosevelt" "F. D. Roosevelt" "F. D. Roosevelt" "H. S. Truman"
[51] "H. S. Truman" "D. D. Eisenhower" "J. F. Kennedy" "L. B. Johnson" "L. B. Johnson"
[56] "R. M. Nixon" "R. M. Nixon" "G. R. Ford" "J. Carter" "R. Reagan"
[61] "G. Bush" "B. Clinton" "G. W. Bush" "B. Obama" "D. J. Trump"
哪个功能可以为我完成Z修复? 如何将这两个字符列表和矩阵粘贴在一起? 谢谢我
答案 0 :(得分:2)
这是正则表达式的一个选项
gsub("(\\b[A-Z])\\s", "\\1. ",
gsub("(?<=\\b[A-Z])[a-z]+|(\\s+\\S+)$", "\\1",
presidents$PRESIDENT, perl = TRUE))
#[1] "G. Washington" "J. Adams" "T. Jefferson" "T. Jefferson" "J. Madison" "J. Madison"
#[7] "J. Madison" "J. Madison" "J. Monroe" "J. Q. Adams" "A. Jackson" "A. Jackson"
#[13] "M. V. Buren" "W. H. Harrison" "J. Tyler" "J. K. Polk" "Z. Taylor" "M. Fillmore"
#[19] "F. Pierce" "F. Pierce" "J. Buchanan" "A. Lincoln" "A. Lincoln" "A. Johnson"
#[25] "U. S. Grant" "U. S. Grant" "U. S. Grant" "R. B. Hayes" "J. A. Garfield" "C. A. Arthur"
#[31] "G. Cleveland" "G. Cleveland" "B. Harrison" "G. Cleveland" "W. McKinley" "W. McKinley"
#[37] "W. McKinley" "T. Roosevelt" "T. Roosevelt" "W. H. Taft" "W. H. Taft" "W. Wilson"
#[43] "W. G. Harding" "C. Coolidge" "C. Coolidge" "H. Hoover" "F. D. Roosevelt" "F. D. Roosevelt"
#[49] "F. D. Roosevelt" "H. S. Truman" "H. S. Truman" "D. D. Eisenhower" "J. F. Kennedy" "L. B. Johnson"
#[55] "L. B. Johnson" "R. M. Nixon" "R. M. Nixon" "G. R. Ford" "J. Carter" "R. Reagan"
#[61] "G. Bush" "B. Clinton" "G. W. Bush" "B. Obama" "D. J. Trump"
答案 1 :(得分:1)
看看是不是。
result <- strsplit(name, " ")
result <- lapply(result, function(s){
n <- length(s)
i <- which(!grepl("[[:alpha:]]\\.", s))
if(length(i) > 0){
i <- setdiff(i, n)
s[i] <- sub("(^[[:alpha:]]).*", "\\1.", s[i])
}
s
})
result <- sapply(result, paste, collapse = " ")