我有一个数据表,如下所示:
ID time somevalues change
001 12:33 13 NA
002 12:34 27 speed: 34
003 12:35 45 width: 127
004 12:36 41 NA
005 12:37 44 height: 19.2
006 12:35 45 NA
007 12:36 49 speed: 35
008 12:37 44 speed: 27
009 12:38 45 NA
010 12:39 44 NA
011 12:40 44 height: 18, speed: 28
012 12:41 40 NA
013 12:42 44 height: 18.1
014 12:43 55 width: 128.1
015 12:44 41 NA
... ... ... ...
该表包含传感器的各种测量值。有些测量仅在更改后才输入。此外,这些测量值始终输入同一列中。我需要的是一个数据表,看起来像这样:
ID time somevalues speed height width
001 12:33 13 34 19.1 128
002 12:34 27 34 19.1 128
003 12:35 45 34 19.1 127
004 12:36 41 34 19.1 127
005 12:37 44 34 19.2 127
006 12:35 45 34 19.2 127
007 12:36 49 35 19.2 127
008 12:37 44 27 19.2 127
009 12:38 45 27 19.2 127
010 12:39 44 27 19.2 127
011 12:40 44 28 18 127
012 12:41 40 28 18 127
013 12:42 44 28 18.1 127
014 12:43 55 28 18.1 128.1
015 12:44 41 28 18.1 128.1
... ... ... ... ... ...
我需要这种格式的数据才能对其进行分析和可视化。 有没有一种方法可以在R中不使用多个if语句呢?
答案 0 :(得分:0)
这对您有用吗?
library(dplyr)
# create data - had to remove the spaces in change, to read the table, but shouldn't make a difference.
data_temp = read.table(text = "
ID time somevalues change
001 12:33 13 NA
002 12:34 27 speed:34
003 12:35 45 width:127
004 12:36 41 NA
005 12:37 44 height:19.2
006 12:35 45 NA
007 12:36 49 speed:35
008 12:37 44 speed:27
009 12:38 45 NA
010 12:39 44 NA
011 12:40 44 height:18,speed:28
012 12:41 40 speed:29,width:120.1
013 12:42 44 height:18.1,speed:30,with:50
014 12:43 55 width:128.1
015 12:44 41 NA"
, header = T, stringsAsFactors = F)
data_wanted = select(data_temp, ID, time, somevalues)
speed = which(grepl("speed:", data_temp$change)) # in which rows is speed
speed_string = gsub(".*speed:", "", data_temp$change[speed]) # get string and remove everything before the speed value
speed_string = gsub(",.*", "", speed_string) # revomve everything behinde the speed value
# set speed variable via loop
# speed contains the positions of rows with information about speed.
# so from row 1 to speed[1]-1 we dont know anthyting about speed yet and so it shall be na
# from position speed[1] to speed[2]-1 it shall be the value of speed_string[1] and so on
data_wanted$speed = NA
for(i in 1:length(speed))
{
current = speed[i] # position of speed-update-information
till_next = ifelse(i < length(speed), speed[i+1]-1, NROW(data_wanted)) # untill position of following speed-update-information or end of Dataframe if no more update information
data_wanted$speed[current:till_next] = as.numeric(speed_string[i]) # set values
}
data_wanted
cbind(data_wanted, data_temp$change)
# ID time somevalues speed data_temp$change
# 1 1 12:33 13 NA <NA>
# 2 2 12:34 27 34 speed:34
# 3 3 12:35 45 34 width:127
# 4 4 12:36 41 34 <NA>
# 5 5 12:37 44 34 height:19.2
# 6 6 12:35 45 34 <NA>
# 7 7 12:36 49 35 speed:35
# 8 8 12:37 44 27 speed:27
# 9 9 12:38 45 27 <NA>
# 10 10 12:39 44 27 <NA>
# 11 11 12:40 44 28 height:18,speed:28
# 12 12 12:41 40 29 speed:29,width:120.1
# 13 13 12:42 44 30 height:18.1,speed:30,with:50
# 14 14 12:43 55 30 width:128.1
# 15 15 12:44 41 30 <NA>