我的数据看起来像这样。
bankname bankid year totass corresbankname1 corresbankloc1 corresdepoin1 corresbankname2 corresbankloc2 corresdepoin2 corresbankname3 corresbankloc3 corresdepoin3
BankA 1 1881 244789 First Bank New York 7250.32 Third National Bank Philadelphia 20218.2 Commercial Bank Philadelphia 29513.4
BankB 2 1881 195755 National Bank Pittsburgh 10243.6 Union Trust Company New York 1851.51 NA NA NA
Bankc 3 1881 107736 Mechanics' Bank New York 13357.8 Wyoming Bank Wilkes-Barre 17761.2 NA NA NA
BankD 4 1881 170600 Commonwealth Bank Philadelphia 3.35 Seventh National Bank Philadelphia 2 NA NA NA
BankE 5 1881 320000 National Bank New York 351266 Mechanics' Bank New York 314012 National Park Bank New York 206580
这可以通过
复制bankname <- c("The Anchor Savings Bank of Pittsburgh","The Arsenal Bank","The Ashley Savings Bank","The Bank of America of Philadelphia","The Bank of Pittsburgh")
bankid <- c( 1, 2, 3, 4, 5)
year<- c( 1881, 1881, 1881, 1881, 1881)
totass <- c(244789, 195755, 107736, 170600, 32000000)
corresbankname1 <- c("First National Bank","National Bank of Commerce","Mechanics' National Bank","Commonwealth National Bank","National Bank of Commerce")
corresbankloc1 <-c("Philadelphia","Pittsburgh","New York","Philadelphia","New York")
corresdepoin1<-c(7250.32,10243.6,13357.8,3.35,351266)
corresbankname2 <- c("Third National Bank","Union Trust Company","Wyoming National Bank","Seventh National Bank","Mechanics' National Bank")
corresbankloc2<-c("New York","New York","Wilkes-Barre","Philadelphia","New York")
corresdepoin2<-c(20218.2,1851.51,17761.2,2,314012)
corresbankname3<-c("Commercial National Bank",NA,NA,NA,"National Park Bank")
corresbankloc3<-c("Philadelphia",NA,NA,NA,"New York")
corresdepoin3<-c(29513.4,NA,NA,NA,206580)
bankdata<-data.frame(bankname, bankid,year,totass,corresbankname1,corresbankloc1,corresdepoin1,corresbankname2,corresbankloc2,corresdepoin2,corresbankname3,corresbankloc3,corresdepoin3)
此数据集显示每家银行在其他银行(corresbankname)及其所在地(corresbankloc)投资的金额(corresdepoin)。我有43个corresbankname,corresbankloc和corresdepoin变量。
由于这些银行投资于同一城市的多家银行,我想知道每个城市的投资总额。因此,我想生成一个名为&#34; total_New York&#34;的新列变量。如果correspobankloc是纽约,则由corresdepoin指示的总金额。如何循环43个变量?
例如,BankE在纽约国家银行(corresbankname1)(corresbankloc1)有351266美元(corresdepoin1),在Mechenics&#39;有314012美元。纽约银行,纽约国家公园银行206580美元。我想要一个名为&#34的新专栏;纽约总储蓄&#34;显示位于纽约的银行的投资总额为871858美元。因此,我想要的是一个条件语句,它循环遍历列(对应的队列)并检查这是否是纽约或其他,然后在&#34; corresdepoin&#34;中对相关值求和。为每个&#34;银行名称获得该城市的总投资总额。&#34;
另外,在stata中,如果我想为多个城市做这件事,我会生成
当地城市&#34;纽约&#34; &#34;费城&#34; &#34;匹兹堡&#34;
并循环遍历它们。 R中有类似的功能吗?
提前谢谢。
答案 0 :(得分:0)
另一个选项是reshape
数据集。使用dplyr
。您可以创建一个函数来输出城市的子集或数据中的整个城市。我不知道这是否有效。
library(dplyr)
fun1 <- function(data, city, byloc = TRUE, allcity = TRUE) {
data1 <- reshape(data, idvar = "bankname", varying = list(grep("corresdepoin",
colnames(data)), grep("corresbankloc", colnames(data))), timevar = "Bankloc",
direction = "long", v.names = c("corresdepoin", "corresbankloc"))
data1 <- data1[!is.na(data1$corresbankloc), ]
row.names(data1) <- 1:nrow(data1)
funlocorNot <- function(data, city, grouploc = TRUE) {
dataF <- data %>%
filter(corresbankloc %in% city)
if (grouploc) {
dataF1 <- dataF %>%
group_by(bankname, corresbankloc, Bankloc) %>%
summarise(Total = sum(corresdepoin, na.rm = TRUE))
}
else {
dataF1 <- dataF %>%
group_by(bankname, corresbankloc) %>%
summarise(Total = sum(corresdepoin, na.rm = TRUE))
}
dataF1[, 2] <- paste("Totalbylocation", dataF1[, 2], sep = "_")
dataF1
}
funallCity <- function(data, grouploc = TRUE) {
if (grouploc) {
dataF1 <- data %>%
group_by(bankname, corresbankloc, Bankloc) %>%
summarise(Total = sum(corresdepoin, na.rm = TRUE))
}
else {
dataF1 <- data %>%
group_by(bankname, corresbankloc) %>%
summarise(Total = sum(corresdepoin, na.rm = TRUE))
}
dataF1[, 2] <- paste("Totalbylocation", dataF1[, 2], sep = "_")
dataF1
}
if (!allcity) {
if (byloc) {
funlocorNot(data1, city, TRUE)
}
else {
funlocorNot(data1, city, FALSE)
}
}
else {
if (byloc) {
funallCity(data1, TRUE)
}
else {
funallCity(data1, FALSE)
}
}
}
as.data.frame(fun1(bankdata, "New York", byloc=TRUE, allcity=FALSE))
# bankname corresbankloc Bankloc
#1 The Anchor Savings Bank of Pittsburgh Totalbylocation_New York 2
#2 The Arsenal Bank Totalbylocation_New York 2
#3 The Ashley Savings Bank Totalbylocation_New York 1
#4 The Bank of Pittsburgh Totalbylocation_New York 1
#5 The Bank of Pittsburgh Totalbylocation_New York 2
#6 The Bank of Pittsburgh Totalbylocation_New York 3
# Total
#1 20218.20
#2 1851.51
#3 13357.80
#4 351266.00
#5 314012.00
#6 206580.00
as.data.frame(fun1(bankdata, "New York", byloc=FALSE, allcity=FALSE))
# bankname corresbankloc Total
#1 The Anchor Savings Bank of Pittsburgh Totalbylocation_New York 20218.20
#2 The Arsenal Bank Totalbylocation_New York 1851.51
#3 The Ashley Savings Bank Totalbylocation_New York 13357.80
#4 The Bank of Pittsburgh Totalbylocation_New York 871858.00
as.data.frame(fun1(bankdata, c("New York", "Pittsburgh"), byloc=FALSE, allcity=FALSE))
as.data.frame(fun1(bankdata, byloc=TRUE, allcity=TRUE))