我有一个包含多个行和列的数据集。下面是一些行和列的快照。
ID Date Gender Age Col1 Col2 Col3 Col4 Col5 Col6 Col7 Col8
10 2015-10-14 F 68 345.50 884.2 008.69 202.18 189.8 435.2 084.7 757
93 2002-07-22 F 87 242.80 710.9 345.50 884.2 008.69 202.18 189.8 435.2
14 2004-07-28 M 92 084.7 757 242.80 710.9 427.2 530.10 567.89 227.9
41 2011-02-24 M 39 714.0 084.7 757 242.80 710.9 427.2 530.10 567.89
64 2002-03-14 F 39 227.9 714.0 V58.49 906.7 800.35 V88.0 349.31 289.84
22 2015-11-21 F 68 324.0 V65.44 411.8 200.41 187.7 E869.3 041.04 170.4
36 2003-09-17 F 75 389.1 176.3 788.37 E936.3 277.82 812.12 E816.7 663.90
11 2000-10-07 M 74 716.90 396.3 482.1 E816.7 663.90 716.90 396.3 482.1
45 2001-07-14 F 31 614.2 945.44 799.4 864.05 371.31 268 626.2 780.72
60 1999-02-23 M 45 674 645.2 006.5 V68.2 V67.00 665.24 434.00 914.3
我有另一个数据集是一个查找表,其中包含Col1, Col2, Col3, Col4, Col5, Col6, Col7
和Col8
中代码的简短说明,如下所示
Code Short_Description
345.50 interStellar
884.2 indispensable
008.69 hallucination
202.18 flow
189.8 categorizing
435.2 choppiness
084.7 chieftain
757 substantiating
V58.49 unbridled
V88.0 polish
324.0 stumble
V65.44 hoopster
411.8 overtrimmed
E869.3 overbrutalizing
041.04 choric
E936.3 busera
277.82 subdelegating
E816.7 baton
663.90 Space
我的问题是如何将第一个数据集中的代码与第二个查找数据集中的代码进行匹配,并将匹配的代码替换为相应的简短描述?
下面的预期输出显示代码345.50
匹配并替换为interStellar
,V58.49
匹配并替换为unbridled
我希望输出全部代码匹配并替换为相应的说明。我知道如何使用 if-then-else 进行此操作,但效率非常低,我认为应该有一些简单的方法来执行此操作。任何帮助深表感谢。提前谢谢。
ID Date Gender Age Col1 Col2 Col3 Col4 Col5 Col6 Col7 Col8
10 2015-10-14 F 68 interStellar 884.2 008.69 202.18 189.8 435.2 084.7 757
93 2002-07-22 F 87 242.80 710.9 interStellar 884.2 008.69 202.18 189.8 435.2
14 2004-07-28 M 92 084.7 757 242.80 710.9 427.2 530.10 567.89 227.9
41 2011-02-24 M 39 714.0 084.7 757 242.80 710.9 427.2 530.10 567.89
64 2002-03-14 F 39 227.9 714.0 unbridled 906.7 800.35 V88.0 349.31 289.84
22 2015-11-21 F 68 324.0 hoopster 411.8 200.41 187.7 E869.3 041.04 170.4
36 2003-09-17 F 75 389.1 176.3 788.37 E936.3 277.82 812.12 baton 663.90
11 2000-10-07 M 74 716.90 396.3 482.1 baton 663.90 716.90 396.3 482.1
45 2001-07-14 F 31 614.2 945.44 799.4 864.05 371.31 268 626.2 780.72
60 1999-02-23 M 45 674 645.2 006.5 V68.2 V67.00 665.24 434.00 914.3
==================== 本例中使用的可重复数据集========================
df1 = structure(list(ID = c(10L, 93L, 14L, 41L, 64L, 22L, 36L, 11L,
45L, 60L), Date = c("10/14/2015", "7/22/2002", "7/28/2004", "2/24/2011",
"3/14/2002", "11/21/2015", "9/17/2003", "10/7/2000", "7/14/2001",
"2/23/1999"), Gender = c("F", "F", "M", "M", "F", "F", "F", "M",
"F", "M"), Age = c(68L, 87L, 92L, 39L, 39L, 68L, 75L, 74L, 31L,
45L), Col1 = c(345.5, 242.8, 84.7, 714, 227.9, 324, 389.1, 716.9,
614.2, 674), Col2 = c("884.2", "710.9", "757", "84.7", "714",
"V65.44", "176.3", "396.3", "945.44", "645.2"), Col3 = c("8.69",
"345.5", "242.8", "757", "V58.49", "411.8", "788.37", "482.1",
"799.4", "6.5"), Col4 = c("202.18", "884.2", "710.9", "242.8",
"906.7", "200.41", "E936.3", "E816.7", "864.05", "V68.2"), Col5 = c("189.8",
"8.69", "427.2", "710.9", "800.35", "187.7", "277.82", "663.9",
"371.31", "V67.00"), Col6 = c("435.2", "202.18", "530.1", "427.2",
"V88.0", "E869.3", "812.12", "716.9", "268", "665.24"), Col7 = c("84.7",
"189.8", "567.89", "530.1", "349.31", "41.04", "E816.7", "396.3",
"626.2", "434"), Col8 = c(757, 435.2, 227.9, 567.89, 289.84,
170.4, 663.9, 482.1, 780.72, 914.3)), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -10L), .Names = c("ID", "Date",
"Gender", "Age", "Col1", "Col2", "Col3", "Col4", "Col5", "Col6",
"Col7", "Col8"), spec = structure(list(cols = structure(list(
ID = structure(list(), class = c("collector_integer", "collector"
)), Date = structure(list(), class = c("collector_character",
"collector")), Gender = structure(list(), class = c("collector_character",
"collector")), Age = structure(list(), class = c("collector_integer",
"collector")), Col1 = structure(list(), class = c("collector_double",
"collector")), Col2 = structure(list(), class = c("collector_character",
"collector")), Col3 = structure(list(), class = c("collector_character",
"collector")), Col4 = structure(list(), class = c("collector_character",
"collector")), Col5 = structure(list(), class = c("collector_character",
"collector")), Col6 = structure(list(), class = c("collector_character",
"collector")), Col7 = structure(list(), class = c("collector_character",
"collector")), Col8 = structure(list(), class = c("collector_double",
"collector"))), .Names = c("ID", "Date", "Gender", "Age",
"Col1", "Col2", "Col3", "Col4", "Col5", "Col6", "Col7", "Col8"
)), default = structure(list(), class = c("collector_guess",
"collector"))), .Names = c("cols", "default"), class = "col_spec"))
lookup_table = structure(list(Code = c("345.5", "884.2", "8.69", "202.18", "189.8",
"435.2", "84.7", "757", "V58.49", "V88.0", "324", "V65.44", "411.8",
"E869.3", "41.04", "E936.3", "277.82", "E816.7", "63.9"), Short_Description = c("interStellar",
"indispensable", "hallucination", "flow", "\tcategorizing", "choppiness",
"chieftain", "\tsubstantiating", "unbridled", "polish", "stumble",
"hoopster", "overtrimmed", "overbrutalizing", "choric", "busera",
"subdelegating", "baton\t", "Space")), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -19L), .Names = c("Code", "Short_Description"
), spec = structure(list(cols = structure(list(Code = structure(list(), class = c("collector_character",
"collector")), Short_Description = structure(list(), class = c("collector_character",
"collector"))), .Names = c("Code", "Short_Description")), default = structure(list(), class = c("collector_guess",
"collector"))), .Names = c("cols", "default"), class = "col_spec"))
答案 0 :(得分:2)
我们可以通过重塑“广泛”来gather/spread
来实现这一目标。数据集到' long'格式最初使用gather
,使用' lookup_table',left_join
'代码'进行mutate
。通过替换' Code'中的元素使用' Short_Description' (它不是缺失的值)和spread
到'宽' select
所需列后的格式(删除' Short_Description')。
library(dplyr)
library(tidyr)
gather(df1, Var, Code, matches("Col")) %>%
left_join(., lookup_table) %>%
mutate(Code = if_else(!is.na(Short_Description), Short_Description, Code)) %>%
select(-Short_Description) %>%
spread(Var, Code)
对于大型数据集,另一个选项是来自set
的{{1}}。创建一个包含' Col'的列名的数字索引。 as substring(' nm1')。转换' data.frame'到' data.table' (data.table
),循环通过' nm1'指定setDT(df1)
后的列并将其转换为.SDcols
(因为预期的输出将包含来自' Short_Description'列的character
字符串。使用{{1} }循环并使用character
更改' i'中指定的列和行的'值(使用for
)。
set
答案 1 :(得分:1)
您可以使用<style>
.image { width: 510px; float: right; }
p::after {
content: " ";
visibility: hidden;
display: block;
height: 0;
clear: both;
}
</style>
<div class="image">
<img src="Foo.jpg">
<p class="caption">Caption</p>
</div>
<p>Text about Foo. Text text text text text text text text text
text text text text text text text text text text text text text
text text</p>
<div class="image">
<img src="Bar.jpg">
<p class="caption">Caption</p>
</div>
<p>Text about Bar. Text text text text text text text text text
text text text text text text text text text text text text text
text text</p>
包
lookup
为其执行循环
qdap