用R中的if语句对列求和

时间:2014-09-15 23:52:34

标签: r rstudio

我的数据看起来像这样。

bankname    bankid  year    totass  corresbankname1 corresbankloc1  corresdepoin1   corresbankname2 corresbankloc2  corresdepoin2   corresbankname3 corresbankloc3  corresdepoin3   
BankA   1   1881    244789  First Bank  New York    7250.32 Third National Bank Philadelphia    20218.2 Commercial Bank Philadelphia    29513.4   

BankB   2   1881    195755  National Bank Pittsburgh    10243.6 Union Trust Company New York    1851.51 NA  NA  NA   

Bankc   3   1881    107736  Mechanics' Bank New York    13357.8 Wyoming Bank    Wilkes-Barre    17761.2 NA  NA  NA      

BankD   4   1881    170600  Commonwealth Bank   Philadelphia    3.35    Seventh National Bank   Philadelphia    2   NA  NA  NA  

BankE   5   1881    320000  National Bank   New York    351266  Mechanics'  Bank    New York    314012  National Park Bank  New York    206580

这可以通过

复制
bankname <- c("The Anchor Savings Bank of Pittsburgh","The Arsenal Bank","The Ashley Savings Bank","The Bank of America of Philadelphia","The Bank of Pittsburgh")
bankid <- c( 1, 2,  3,  4,  5)
year<- c( 1881, 1881,   1881,   1881,   1881)
totass  <- c(244789,    195755, 107736, 170600, 32000000)
corresbankname1 <- c("First National Bank","National Bank of Commerce","Mechanics' National Bank","Commonwealth National Bank","National Bank of Commerce")
corresbankloc1 <-c("Philadelphia","Pittsburgh","New York","Philadelphia","New York")
corresdepoin1<-c(7250.32,10243.6,13357.8,3.35,351266)
corresbankname2 <- c("Third National Bank","Union Trust Company","Wyoming National Bank","Seventh National Bank","Mechanics' National Bank")
corresbankloc2<-c("New York","New York","Wilkes-Barre","Philadelphia","New York")
corresdepoin2<-c(20218.2,1851.51,17761.2,2,314012)
corresbankname3<-c("Commercial National Bank",NA,NA,NA,"National Park Bank")
corresbankloc3<-c("Philadelphia",NA,NA,NA,"New York")
corresdepoin3<-c(29513.4,NA,NA,NA,206580)
bankdata<-data.frame(bankname, bankid,year,totass,corresbankname1,corresbankloc1,corresdepoin1,corresbankname2,corresbankloc2,corresdepoin2,corresbankname3,corresbankloc3,corresdepoin3)

此数据集显示每家银行在其他银行(corresbankname)及其所在地(corresbankloc)投资的金额(corresdepoin)。我有43个corresbankname,corresbankloc和corresdepoin变量。

由于这些银行投资于同一城市的多家银行,我想知道每个城市的投资总额。因此,我想生成一个名为&#34; total_New York&#34;的新列变量。如果correspobankloc是纽约,则由corresdepoin指示的总金额。如何循环43个变量?

例如,BankE在纽约国家银行(corresbankname1)(corresbankloc1)有351266美元(corresdepoin1),在Mechenics&#39;有314012美元。纽约银行,纽约国家公园银行206580美元。我想要一个名为&#34的新专栏;纽约总储蓄&#34;显示位于纽约的银行的投资总额为871858美元。因此,我想要的是一个条件语句,它循环遍历列(对应的队列)并检查这是否是纽约或其他,然后在&#34; corresdepoin&#34;中对相关值求和。为每个&#34;银行名称获得该城市的总投资总额。&#34;

另外,在stata中,如果我想为多个城市做这件事,我会生成
当地城市&#34;纽约&#34; &#34;费城&#34; &#34;匹兹堡&#34;
并循环遍历它们。 R中有类似的功能吗?

提前谢谢。

1 个答案:

答案 0 :(得分:0)

另一个选项是reshape数据集。使用dplyr。您可以创建一个函数来输出城市的子集或数据中的整个城市。我不知道这是否有效。

library(dplyr)
fun1 <- function(data, city, byloc = TRUE, allcity = TRUE) {
   data1 <- reshape(data, idvar = "bankname", varying = list(grep("corresdepoin", 
    colnames(data)), grep("corresbankloc", colnames(data))), timevar = "Bankloc", 
    direction = "long", v.names = c("corresdepoin", "corresbankloc"))

data1 <- data1[!is.na(data1$corresbankloc), ]
row.names(data1) <- 1:nrow(data1)

funlocorNot <- function(data, city, grouploc = TRUE) {
    dataF <- data %>%
             filter(corresbankloc %in% city)
    if (grouploc) {
        dataF1 <- dataF %>%
                  group_by(bankname, corresbankloc, Bankloc) %>%
                  summarise(Total = sum(corresdepoin, na.rm = TRUE))
    } 
    else {
        dataF1 <- dataF %>% 
                   group_by(bankname, corresbankloc) %>% 
                  summarise(Total = sum(corresdepoin, na.rm = TRUE))
    }

    dataF1[, 2] <- paste("Totalbylocation", dataF1[, 2], sep = "_")
    dataF1
}

funallCity <- function(data, grouploc = TRUE) {
    if (grouploc) {
        dataF1 <- data %>%
                  group_by(bankname, corresbankloc, Bankloc) %>% 
                  summarise(Total = sum(corresdepoin, na.rm = TRUE))
    }
     else {
        dataF1 <- data %>%
                  group_by(bankname, corresbankloc) %>% 
                  summarise(Total = sum(corresdepoin, na.rm = TRUE))
    }

    dataF1[, 2] <- paste("Totalbylocation", dataF1[, 2], sep = "_")
    dataF1
  }

 if (!allcity) {
    if (byloc) {
        funlocorNot(data1, city, TRUE)
    } 
    else {
        funlocorNot(data1, city, FALSE)
    }
  }
  else {
    if (byloc) {
        funallCity(data1, TRUE)
    } 
    else {
        funallCity(data1, FALSE)
    }
  }

}

 as.data.frame(fun1(bankdata, "New York", byloc=TRUE, allcity=FALSE))
 #                                bankname            corresbankloc Bankloc
 #1 The Anchor Savings Bank of Pittsburgh Totalbylocation_New York       2
 #2                      The Arsenal Bank Totalbylocation_New York       2
 #3               The Ashley Savings Bank Totalbylocation_New York       1
 #4                The Bank of Pittsburgh Totalbylocation_New York       1
 #5                The Bank of Pittsburgh Totalbylocation_New York       2
 #6                The Bank of Pittsburgh Totalbylocation_New York       3
 #     Total
 #1  20218.20
 #2   1851.51
 #3  13357.80
 #4 351266.00
 #5 314012.00
 #6 206580.00

 as.data.frame(fun1(bankdata, "New York", byloc=FALSE, allcity=FALSE))
 #                               bankname            corresbankloc     Total
 #1 The Anchor Savings Bank of Pittsburgh Totalbylocation_New York  20218.20
 #2                      The Arsenal Bank Totalbylocation_New York   1851.51
 #3               The Ashley Savings Bank Totalbylocation_New York  13357.80
 #4                The Bank of Pittsburgh Totalbylocation_New York 871858.00


 as.data.frame(fun1(bankdata, c("New York", "Pittsburgh"), byloc=FALSE, allcity=FALSE))

 as.data.frame(fun1(bankdata,  byloc=TRUE, allcity=TRUE))