粘贴两个字符列表矩阵

时间:2018-12-15 15:56:38

标签: r

我如何将这两个字符列表和矩阵粘贴在一起

library(XML)
library(stringr)
library(rlist )
theURL="http://www.loc.gov/rr/print/list/057_chron.html"
presidents=readHTMLTable(theURL,which=4,header=TRUE,stringsAsFactors=FALSE) 
name=presidents$PRESIDENT

lastname=str_match(name," [^ ]+$")

t1=str_split(name,lastname)
t1=data.frame(t1,stringsAsFactors=FALSE)
X=sapply(t1,str_split," ")
X=X[(1:65)*2-1]
X=sapply(X,str_sub,1,1)
X=sapply(X,paste0,".")

Z=paste0(X,lastname)

Z

这是我的输出

[1] "G. Washington"            "J. Adams"                     "T. Jefferson"                
 [4] "T. Jefferson"                 "J. Madison"                   "J. Madison"                  
 [7] "J. Madison"                   "J. Madison"                   "J. Monroe"                   
[10] "c(\"J.\", \"Q.\") Adams"      "A. Jackson"                   "A. Jackson"                  
[13] "c(\"M.\", \"V.\") Buren"      "c(\"W.\", \"H.\") Harrison"   "J. Tyler"                    
[16] "c(\"J.\", \"K.\") Polk"       "Z. Taylor"                    "M. Fillmore"                 
[19] "F. Pierce"                    "F. Pierce"                    "J. Buchanan"                 
[22] "A. Lincoln"                   "A. Lincoln"                   "A. Johnson"                  
[25] "c(\"U.\", \"S.\") Grant"      "c(\"U.\", \"S.\") Grant"      "c(\"U.\", \"S.\") Grant"     
[28] "c(\"R.\", \"B.\") Hayes"      "c(\"J.\", \"A.\") Garfield"   "c(\"C.\", \"A.\") Arthur"    
[31] "G. Cleveland"                 "G. Cleveland"                 "B. Harrison"                 
[34] "G. Cleveland"                 "W. McKinley"                  "W. McKinley"                 
[37] "W. McKinley"                  "T. Roosevelt"                 "T. Roosevelt"                
[40] "c(\"W.\", \"H.\") Taft"       "c(\"W.\", \"H.\") Taft"       "W. Wilson"                   
[43] "c(\"W.\", \"G.\") Harding"    "C. Coolidge"                  "C. Coolidge"                 
[46] "H. Hoover"                    "c(\"F.\", \"D.\") Roosevelt"  "c(\"F.\", \"D.\") Roosevelt" 
[49] "c(\"F.\", \"D.\") Roosevelt"  "c(\"H.\", \"S.\") Truman"     "c(\"H.\", \"S.\") Truman"    
[52] "c(\"D.\", \"D.\") Eisenhower" "c(\"J.\", \"F.\") Kennedy"    "c(\"L.\", \"B.\") Johnson"   
[55] "c(\"L.\", \"B.\") Johnson"    "c(\"R.\", \"M.\") Nixon"      "c(\"R.\", \"M.\") Nixon"     
[58] "c(\"G.\", \"R.\") Ford"       "J. Carter"                    "R. Reagan"                   
[61] "G. Bush"                      "B. Clinton"                   "c(\"G.\", \"W.\") Bush"      
[64] "B. Obama"                     "c(\"D.\", \"J.\") Trump"     

我希望成为

 [1] "G. Washington"    "J. Adams"         "T. Jefferson"     "T. Jefferson"     "J. Madison"      
 [6] "J. Madison"       "J. Madison"       "J. Madison"       "J. Monroe"        "J. Q. Adams"     
[11] "A. Jackson"       "A. Jackson"       "M. V. Buren"      "W. H. Harrison"   "J. Tyler"        
[16] "J. K. Polk"       "Z. Taylor"        "M. Fillmore"      "F. Pierce"        "F. Pierce"       
[21] "J. Buchanan"      "A. Lincoln"       "A. Lincoln"       "A. Johnson"       "U. S. Grant"     
[26] "U. S. Grant"      "U. S. Grant"      "R. B. Hayes"      "J. A. Garfield"   "C. A. Arthur"    
[31] "G. Cleveland"     "G. Cleveland"     "B. Harrison"      "G. Cleveland"     "W. McKinley"     
[36] "W. McKinley"      "W. McKinley"      "T. Roosevelt"     "T. Roosevelt"     "W. H. Taft"      
[41] "W. H. Taft"       "W. Wilson"        "W. G. Harding"    "C. Coolidge"      "C. Coolidge"     
[46] "H. Hoover"        "F. D. Roosevelt"  "F. D. Roosevelt"  "F. D. Roosevelt"  "H. S. Truman"    
[51] "H. S. Truman"     "D. D. Eisenhower" "J. F. Kennedy"    "L. B. Johnson"    "L. B. Johnson"   
[56] "R. M. Nixon"      "R. M. Nixon"      "G. R. Ford"       "J. Carter"        "R. Reagan"       
[61] "G. Bush"          "B. Clinton"       "G. W. Bush"       "B. Obama"         "D. J. Trump"  

哪个功能可以为我完成Z修复? 如何将这两个字符列表和矩阵粘贴在一起? 谢谢我

2 个答案:

答案 0 :(得分:2)

这是正则表达式的一个选项

gsub("(\\b[A-Z])\\s", "\\1. ", 
    gsub("(?<=\\b[A-Z])[a-z]+|(\\s+\\S+)$", "\\1", 
             presidents$PRESIDENT, perl = TRUE))
#[1] "G. Washington"    "J. Adams"         "T. Jefferson"     "T. Jefferson"     "J. Madison"       "J. Madison"      
#[7] "J. Madison"       "J. Madison"       "J. Monroe"        "J. Q. Adams"      "A. Jackson"       "A. Jackson"      
#[13] "M. V. Buren"      "W. H. Harrison"   "J. Tyler"         "J. K. Polk"       "Z. Taylor"        "M. Fillmore"     
#[19] "F. Pierce"        "F. Pierce"        "J. Buchanan"      "A. Lincoln"       "A. Lincoln"       "A. Johnson"      
#[25] "U. S. Grant"      "U. S. Grant"      "U. S. Grant"      "R. B. Hayes"      "J. A. Garfield"   "C. A. Arthur"    
#[31] "G. Cleveland"     "G. Cleveland"     "B. Harrison"      "G. Cleveland"     "W. McKinley"      "W. McKinley"     
#[37] "W. McKinley"      "T. Roosevelt"     "T. Roosevelt"     "W. H. Taft"       "W. H. Taft"       "W. Wilson"       
#[43] "W. G. Harding"    "C. Coolidge"      "C. Coolidge"      "H. Hoover"        "F. D. Roosevelt"  "F. D. Roosevelt" 
#[49] "F. D. Roosevelt"  "H. S. Truman"     "H. S. Truman"     "D. D. Eisenhower" "J. F. Kennedy"    "L. B. Johnson"   
#[55] "L. B. Johnson"    "R. M. Nixon"      "R. M. Nixon"      "G. R. Ford"       "J. Carter"        "R. Reagan"       
#[61] "G. Bush"          "B. Clinton"       "G. W. Bush"       "B. Obama"         "D. J. Trump"   

答案 1 :(得分:1)

看看是不是。

result <- strsplit(name, " ")
result <- lapply(result, function(s){
  n <- length(s)
  i <- which(!grepl("[[:alpha:]]\\.", s))
  if(length(i) > 0){
    i <- setdiff(i, n)
    s[i] <- sub("(^[[:alpha:]]).*", "\\1.", s[i])
  }
  s
})
result <- sapply(result, paste, collapse = " ")