我正在尝试基于数据框中的两个参数创建一个相关图。但是,我对如何操纵数据框以获取所需的内容感到困惑。
这是我的数据框的结构:
structure(list(orgid = c("USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ",
"USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ",
"USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ",
"USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ"), locid = c("USGS-01367785",
"USGS-01367785", "USGS-01455099", "USGS-01455099", "USGS-01440000",
"USGS-01440000", "USGS-01380100", "USGS-01380100", "USGS-01380100",
"USGS-01387700", "USGS-01387700", "USGS-01398000", "USGS-01398000",
"USGS-0140940950", "USGS-01466500", "USGS-01461880", "USGS-01461880",
"USGS-01445600", "USGS-01446400", "USGS-0140940950"), stdate = structure(c(16134,
16134, 16133, 16133, 16135, 16135, 16133, 16133, 16133, 16127,
16127, 16105, 16105, 16112, 15770, 15749, 15749, 15749, 15762,
16112), class = "Date"), sttime = structure(c(45000, 45000, 39600,
39600, 35040, 35040, 48000, 48000, 48000, 39600, 39600, 38700,
38700, 39600, 37200, 32400, 32400, 40500, 36000, 39600), class = c("hms",
"difftime"), units = "secs"), charnam = c("Total dissolved solids",
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids",
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids",
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids",
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids",
"Total dissolved solids", "Specific conductance", "Total dissolved solids",
"Specific conductance", "Specific conductance", "Specific conductance",
"Total dissolved solids"), val = c(0.21, 154, 0.43, 333, 0.16,
109, 12.1, 0.2, 143, 32, 0.05, 1.03, 711, 1.62, 31, 218, 391,
384, 478, 104), valunit = c("tons/ac ft", "mg/l", "tons/ac ft",
"mg/l", "tons/ac ft", "mg/l", "tons/day", "tons/ac ft", "mg/l",
"mg/l", "tons/ac ft", "tons/ac ft", "mg/l", "tons/day", "uS/cm @25C",
"mg/l", "uS/cm @25C", "uS/cm @25C", "uS/cm @25C", "mg/l"), swqs = c("FW2-NT",
"FW2-NT", "FW2-TP", "FW2-TP", "FW2-TM", "FW2-TM", "FW2-NT", "FW2-NT",
"FW2-NT", "FW2-TP", "FW2-TP", "FW2-NT", "FW2-NT", "PL", "FW1",
"FW2-TM", "FW2-TM", "FW2-NT", "FW2-TM", "PL"), WMA = c(2L, 2L,
1L, 1L, 1L, 1L, 6L, 6L, 6L, 3L, 3L, 8L, 8L, 14L, 19L, 11L, 11L,
1L, 1L, 14L), year = c(2014L, 2014L, 2014L, 2014L, 2014L, 2014L,
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2013L,
2013L, 2013L, 2013L, 2013L, 2014L)), .Names = c("orgid", "locid",
"stdate", "sttime", "charnam", "val", "valunit", "swqs", "WMA",
"year"), row.names = c(NA, -20L), class = c("tbl_df", "tbl",
"data.frame"))
我想绘制总溶解固体与比电导的关系图。但是,每个这些参数的值都在一个名为val的列中。我是否需要操纵数据框以在其自己的列中具有“总溶解固体”值及其与“电导率”相同的值?如果是这样,我将如何基于charnam列具有参数名称以及val列具有参数所有值的方式来做到这一点?我尝试设置数据框的子集,但无法正常工作。
我有验证码:
correlation_plot1<-ggplot() +
geom_point(data=TDS_correlation_df,aes(x="",y=val))+
geom_point(data=SC_correlation_df,aes(x=val,y=""))+
ggtitle("Statewide Total Dissolved Solids vs. Specific Conductance Correlation\n;1997-2018") +
xlab("SC(µS/cm)") + ylab("TDS(mg/L)")+
scale_color_manual("",
values = c("red"),
labels=c("Freshwater Aquatic Life Criteria for TDS = 500 mg/L"))+
correlation_theme+
theme(legend.position ="bottom")
TDS_correlation和Sc_correlation是原始数据集的子集,已过滤为仅具有该参数。
答案 0 :(得分:0)
好的,这很笨拙,但是我相信它可以满足您的需求。正如评论所讨论的,问题不在于您的ggplot
代码,而在于您的数据:
data <- structure(list(orgid = c("USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ",
"USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ",
"USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ",
"USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ"), locid = c("USGS-01367785",
"USGS-01367785", "USGS-01455099", "USGS-01455099", "USGS-01440000",
"USGS-01440000", "USGS-01380100", "USGS-01380100", "USGS-01380100",
"USGS-01387700", "USGS-01387700", "USGS-01398000", "USGS-01398000",
"USGS-0140940950", "USGS-01466500", "USGS-01461880", "USGS-01461880",
"USGS-01445600", "USGS-01446400", "USGS-0140940950"), stdate = structure(c(16134,
16134, 16133, 16133, 16135, 16135, 16133, 16133, 16133, 16127,
16127, 16105, 16105, 16112, 15770, 15749, 15749, 15749, 15762,
16112), class = "Date"), sttime = structure(c(45000, 45000, 39600,
39600, 35040, 35040, 48000, 48000, 48000, 39600, 39600, 38700,
38700, 39600, 37200, 32400, 32400, 40500, 36000, 39600), class = c("hms",
"difftime"), units = "secs"), charnam = c("Total dissolved solids",
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids",
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids",
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids",
"Total dissolved solids", "Total dissolved solids", "Total dissolved solids",
"Total dissolved solids", "Specific conductance", "Total dissolved solids",
"Specific conductance", "Specific conductance", "Specific conductance",
"Total dissolved solids"), val = c(0.21, 154, 0.43, 333, 0.16,
109, 12.1, 0.2, 143, 32, 0.05, 1.03, 711, 1.62, 31, 218, 391,
384, 478, 104), valunit = c("tons/ac ft", "mg/l", "tons/ac ft",
"mg/l", "tons/ac ft", "mg/l", "tons/day", "tons/ac ft", "mg/l",
"mg/l", "tons/ac ft", "tons/ac ft", "mg/l", "tons/day", "uS/cm @25C",
"mg/l", "uS/cm @25C", "uS/cm @25C", "uS/cm @25C", "mg/l"), swqs = c("FW2-NT",
"FW2-NT", "FW2-TP", "FW2-TP", "FW2-TM", "FW2-TM", "FW2-NT", "FW2-NT",
"FW2-NT", "FW2-TP", "FW2-TP", "FW2-NT", "FW2-NT", "PL", "FW1",
"FW2-TM", "FW2-TM", "FW2-NT", "FW2-TM", "PL"), WMA = c(2L, 2L,
1L, 1L, 1L, 1L, 6L, 6L, 6L, 3L, 3L, 8L, 8L, 14L, 19L, 11L, 11L,
1L, 1L, 14L), year = c(2014L, 2014L, 2014L, 2014L, 2014L, 2014L,
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2013L,
2013L, 2013L, 2013L, 2013L, 2014L)), .Names = c("orgid", "locid",
"stdate", "sttime", "charnam", "val", "valunit", "swqs", "WMA",
"year"), row.names = c(NA, -20L), class = c("tbl_df", "tbl",
"data.frame"))
library(tidyverse)
data_tidy <- data %>%
spread(charnam, val)
specific_conductance <- data_tidy %>%
select(`Specific conductance`) %>%
filter(!is.na(`Specific conductance`) == T) %>%
rep(times = 4) %>%
as.data.frame() %>%
gather(1:4) %>%
select(value) %>%
rename("Specific conductance" = value) %>%
rowid_to_column()
total_dissolved_solids <- data_tidy %>%
select(`Total dissolved solids`) %>%
filter(!is.na(`Total dissolved solids`) == T) %>%
rowid_to_column()
combined <- total_dissolved_solids %>%
left_join(specific_conductance)
ggplot(combined, aes(x = `Specific conductance`, y = `Total dissolved solids`)) +
geom_point()