早上好,我创建了以下R代码:
setwd("xxx")
library(reshape)
##Insert needed year
url <- "./Quarterly/1990_qtrly.csv"
##Writes data in R with applicable columns
qtrly_data <- read.csv(url, header = TRUE, sep = ",", quote="\"", dec=".", na.strings=" ", skip=0)
relevant_cols <- c("area_fips", "industry_code", "own_code", "agglvl_code", "year", "qtr")
overall <- c(relevant_cols, colnames(qtrly_data)[8:16])
lq <- c(relevant_cols, colnames(qtrly_data)[17:25])
oty <- c(relevant_cols, colnames(qtrly_data)[18:42])
types <- c("overall", "lq", "oty")
overallx <- colnames(qtrly_data)[9:16]
lqx <- colnames(qtrly_data)[18:25]
otyx <- colnames(qtrly_data)[seq(27,42,2)]
###Adding in the disclosure codes from each section
disc_codes <- c("disclosure_code", "lq_disclosure_code", "oty_disclosure_code")
cols_list = list(overall, lq, oty)
denom_list = list(overallx, lqx, otyx)
##Uses a two-loop peice of code to go through data denominations and categories, while melting it into the correct format
for (j in 1:length(types))
{
cat("Working on type: " , types[j], "\n")
these_denominations <- denom_list[[j]]
type_data <- qtrly_data[ , cols_list[[j]] ]
QCEW_County <- melt(type_data, id=c(relevant_cols, disc_codes[j]))
colnames(QCEW_County) <- c(relevant_cols, "disclosure_code", "text_denomination", "value")
Data_Cat <- j
for (k in 1:length(these_denominations))
{
cat("Working on type: " , types[j], "and denomination: ", these_denominations[k], "\n")
QCEW_County_Denominated <- QCEW_County[QCEW_County[, "text_denomination"] == these_denominations[k], ]
QCEW_County_Denominated$disclosure_code <- ifelse(QCEW_County_Denominated$disclosure_code == "", 0, 1)
Data_Denom <- k
QCEW_County_Denominated <- cbind(QCEW_County_Denominated, Data_Cat, Data_Denom)
QCEW_County_Denominated$Source_ID <- 1
QCEW_County_Denominated$text_denomination <- NULL
colnames(QCEW_County_Denominated) <- NULL
###Actually writes the txt file to the QCEW folder
write.table(QCEW_County_Denominated, file="C:\\Users\\jjackson\\Downloads\\QCEW\\1990_test.txt", append=TRUE, quote=FALSE, sep=',', row.names=FALSE)
}
}
现在,我需要摆脱一些事情,即我的QCEW_County_D指定数据帧中的所有行,其中&#34; area_fips&#34;列以字符&#34; C&#34;开头,在同一列中,还有以美国开头的代码,我想用0代替。最后,我还有&#34; industry_code&#34 ;在我的最终数据框中有3个值需要替换的列。 31-33,31,44-45,44,48-49,48。我知道这是一项艰巨的任务。我自己慢慢搞清楚,但是如果有人能在我自己的方法中找到正确的方向,我会非常感激。 R中的条件语句看起来像是我的致命弱点,因为它始终与我的语法与其他统计软件包的不同之处相混淆。 谢谢你,祝你有个愉快的一天。
答案 0 :(得分:0)
您可以使用regex
和子集删除和重新编码数据。
使用grepl
,您可以选择 area_fips 列中以DON开头的行。
QCEW_County_Denominated <- QCEW_County_Denominated[!grepl("^C", QCEW_County_Denominated$area_fips), ]
使用gsub
,您可以将0替换为 area_fips 列中以0开头的值。
QCEW_County_Denominated$area_fips <- as.numeric(gsub("^US", 0, QCEW_County_Denominated$area_fips))
最后,使用子集,您可以替换 industry_code 中的值。
QCEW_County_Denominated$industry_code[QCEW_County_Denominated$industry_code == "31-33"] <- 31
QCEW_County_Denominated$industry_code[QCEW_County_Denominated$industry_code == "44-45"] <- 44
QCEW_County_Denominated$industry_code[QCEW_County_Denominated$industry_code == "48-49"] <- 48