我试图在数据框中创建一个新列,该列基于具有特定唯一值的另一列。我已经通过在mutate中使用case_when参数成功完成了此操作。但是,我想使用str_detect,因为我有大量数据,并且我不想键入每个唯一值,因为大多数都是相同的,只是字符串中的最后几个数字有所不同。当我尝试在case_when中使用str_detect时遇到错误。..有人知道我该如何实现吗?
数据吞吐量:
dput(head(HUC_wq_summary,10))
structure(list(locid = c("21NJDEP1-01464380", "21NJDEP1-01464380",
"21NJDEP1-01464380", "21NJDEP1-01464380", "21NJDEP1-01464380",
"21NJDEP1-01464380", "21NJDEP1-01464380", "21NJDEP1-01464380",
"21NJDEP1-01464380", "21NJDEP1-01464380"), parameter = c("Alkalinity, Carbonate as CaCO3",
"Alkalinity, Carbonate as CaCO3", "Alkalinity, Carbonate as CaCO3",
"Alkalinity, Carbonate as CaCO3", "Alkalinity, Carbonate as CaCO3",
"Ammonia-nitrogen as N", "Ammonia-nitrogen as N", "Ammonia-nitrogen as N",
"Ammonia-nitrogen as N", "Barometric pressure"), ActivityStartDate = structure(c(13902,
13986, 14075, 14179, 14271, 13902, 13986, 14075, 14271, 13902
), class = "Date"), ActivityEndDate = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), Samples = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), MinValue = c(26, 44, 40,
28, 25, 0.12, 0.01, 0.05, 0.13, 760), MaxValue = c(26, 44, 40,
28, 25, 0.12, 0.01, 0.05, 0.13, 760), Average = c(26, 44, 40,
28, 25, 0.12, 0.01, 0.05, 0.13, 760), HUC14 = c("HUC02040201040070",
"HUC02040201040070", "HUC02040201040070", "HUC02040201040070",
"HUC02040201040070", "HUC02040201040070", "HUC02040201040070",
"HUC02040201040070", "HUC02040201040070", "HUC02040201040070"
), Trib = c("HUC02040201040070", "HUC02040201040070", "HUC02040201040070",
"HUC02040201040070", "HUC02040201040070", "HUC02040201040070",
"HUC02040201040070", "HUC02040201040070", "HUC02040201040070",
"HUC02040201040070")), .Names = c("locid", "parameter", "ActivityStartDate",
"ActivityEndDate", "Samples", "MinValue", "MaxValue", "Average",
"HUC14", "Trib"), row.names = c(NA, -10L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), vars = c("locid", "parameter",
"ActivityStartDate"), drop = TRUE, indices = list(0L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L), group_sizes = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
locid = c("21NJDEP1-01464380", "21NJDEP1-01464380", "21NJDEP1-01464380",
"21NJDEP1-01464380", "21NJDEP1-01464380", "21NJDEP1-01464380",
"21NJDEP1-01464380", "21NJDEP1-01464380", "21NJDEP1-01464380",
"21NJDEP1-01464380"), parameter = c("Alkalinity, Carbonate as CaCO3",
"Alkalinity, Carbonate as CaCO3", "Alkalinity, Carbonate as CaCO3",
"Alkalinity, Carbonate as CaCO3", "Alkalinity, Carbonate as CaCO3",
"Ammonia-nitrogen as N", "Ammonia-nitrogen as N", "Ammonia-nitrogen as N",
"Ammonia-nitrogen as N", "Barometric pressure"), ActivityStartDate = structure(c(13902,
13986, 14075, 14179, 14271, 13902, 13986, 14075, 14271, 13902
), class = "Date")), row.names = c(NA, -10L), class = "data.frame", vars = c("locid",
"parameter", "ActivityStartDate"), drop = TRUE, .Names = c("locid",
"parameter", "ActivityStartDate")))
代码:
HUC_wq_summary<-HUC_wq_summary%>%
mutate(Trib=case_when(HUC14 == "HUC02040202100060" ~ "Pennsauken",
HUC14 == "HUC02040202100050" ~ "Pennsauken",
HUC14 == "HUC02040202100020" ~ "Pennsauken",
HUC14 == "HUC02040202100030" ~ "Pennsauken",
HUC14 == "HUC02040202100040" ~ "Pennsauken",
HUC14 == "HUC02040202100010" ~ "Pennsauken",
HUC14 == "HUC02040202150010" ~ "Raccoon",
HUC14 == "HUC02040202150060" ~ "Raccoon",
HUC14 == "HUC02040202150020" ~ "Raccoon",
HUC14 == "HUC02040202150040" ~ "Raccoon",
HUC14 == "HUC02040202150050" ~ "Raccoon",
HUC14 == "HUC02040202150030" ~ "Raccoon",
HUC14 == "HUC02040202080040"~ "Rancocas",
HUC14 == "HUC02040202080030"~ "Rancocas",
HUC14 == str_detect(HUC14,"HUC020402020600"~ "Rancocas"),TRUE ~ HUC14))
如您所见,大多数HUC14字符串都是相同的,只是每个字符串都相差几个数字。
答案 0 :(得分:3)
HUC_wq_summary <- HUC_wq_summary %>%
mutate(Trib=case_when(str_detect(HUC14,".*000[1-6]0") ~ "Pennsauken", #Search for 000 followed by a digit from 1-6 followed by 0
str_detect(HUC14,".*500[1-6]0" ) ~ "Raccoon",
str_detect(HUC14,".*800[34]0" ) ~ "Rancocas", #Search for 800 followed by 3 or 4 followed by 0
TRUE ~ HUC14))
答案 1 :(得分:1)
function Continue({show, onContinue}) {
return(<div className="row continue">
{ show ? <div className="col-11">
<button class="btn btn-primary btn-lg float-right" onClick= {onContinue}>Continue</button>
</div>
: null }
</div>);
}
基本上,您只保留const item_1 = {
a: 'a',
b: 'b'
};
const jsonPayload = JSON.stringify(item_1);
fetch('/some_api/endpoint',{
headers: {
"Content-Type":"text/json"
},
method: 'POST',
body: jsonPayload
});
变量的数字并使其为数字。然后创建间隔,第一个间隔为HUC_wq_summary$Trib <- cut(as.numeric(gsub("\\D", "", HUC_wq_summary$HUC14)), #keep only digits
c(02040201040030, 02040202080030, 02040202100010, 02040202150010, Inf), #create intervals
c("BlacksCrosswicks", "Rancocas", "Raccoon", "Pennsauken"), #define labels
include.lowest = T) # close intervals on the left rather than on the right
到HUC14
,标记为02040201040030
,等等。