在这里尝试学习R .. 我有一个数据框,正在尝试以编程方式从某些列构造矩阵。在语法上花了很长时间。
这是输入数据
XV= 0.5 0.5 1 1.5 3.5 5.5 7 9 NA NA NA NA NA NA NA NA
YV= 5 10 25 15 15 25 25 45 NA NA NA NA NA NA NA NA
type= 1BP2 2B 1BP2 2BP 1BP2 1BP2 1BP2 1BP2 NA NA NA NA NA NA NA NA
这是df
我不提前知道有多少行,但是ZL术语的编号与XL变量的编号相同,并且ZL变量的列与YL变量的行一样多。所有这些都导致了一个简单的2D插值算法,我正在尝试构建查找矩阵。
代码像这样
z <- curvsx(XV,YV,type,lookupfile)
in curvsx...
curvsx <- function(xi,yi,type,dfname) {
df<-read.csv(dfname)
xl<-eval(parse(text=(paste("na.omit(df$XL",TYPE,")",sep=""))))
Nxl<-length(na.omit(xl))
yl<-eval(parse(text=(paste("na.omit(df$YL",TYPE,")",sep=""))))
Nyl<-length(na.omit(yl))
对于上面的TYPE =“ 1BP2”代码产生
TYPE
"1BP2"
Nxl
7
Nyl
3
xl
1 2 3 4 5 6 7
attr(,"na.action")
attr(,"class")
"omit"
yl
10 20 30
attr(,"na.action")
4 5 6 7 8
attr(,"class")
"omit"
#this works but is clunky... (print statements to aid debug)
for (i in 1:Nyl) {
zl<-eval(parse(text=(paste("na.omit(df$ZL",i,TYPE,")",sep=""))))
if(i==1) {zo<-matrix(zl,nrow=Nxl,byrow=FALSE)}
else {zo<-matrix(c(zo,zl),nrow=Nxl,byrow=FALSE)}
print(zo)
}
# produces...
[,1] [,2] [,3]
[1,] 100 200 300
[2,] 200 300 400
[3,] 300 600 1000
[4,] 400 900 1600
[5,] 500 1200 2200
[6,] 600 1500 2800
[7,] 700 1800 3400
这是我优雅地尝试在1行中完成的尝试,但不起作用...
zo=matrix(c(eval(parse(text=(paste("na.omit(df$ZL",i=1:Nyl,TYPE,")",sep=""))))),ncol=Nyl,byrow=FALSE)
# results in...
> zo
[,1] [,2] [,3]
[1,] 300 1600 3400
[2,] 400 2200 300
[3,] 1000 2800 400
当我尝试通过分段查看来调试1个内衬时...
paste("na.omit(df$ZL",i=1:Nyl,TYPE,")",sep="")
# produces
"na.omit(df$ZL11BP2)" "na.omit(df$ZL21BP2)" "na.omit(df$ZL31BP2)"
parse(text=(paste("na.omit(df$ZL",i=1:Nyl,TYPE,")",sep="")))
# produces
expression(na.omit(df$ZL11BP2), na.omit(df$ZL21BP2),
na.omit(df$ZL31BP2))
# but things seem to go south on
c(eval(parse(text=(paste("na.omit(df$ZL",i=1:Nyl,TYPE,")",sep="")))))
#which produces
300 400 1000 1600 2200 2800 3400
# what I was hoping for, I thought I was executing this
c(na.omit(df$ZL11BP2), na.omit(df$ZL21BP2), na.omit(df$ZL31BP2))
# which produces what I want...
100 200 300 400 500 600 700 200 300 600 900 1200 1500 1800 300 400 1000 1600 2200 2800 3400
我缺少或不了解的内容
谢谢
答案 0 :(得分:0)
一种方法是使用tidyr
延长数据透视时间,并将列名称分为Variable
,MainType
和SubType
。
然后可以轻松过滤它们。
library(tidyr)
library(dplyr)
library(stringr)
data <- data %>%
pivot_longer(everything(), values_to = "Value") %>%
separate(name,into = c("Variable","Type"), sep = 2) %>%
mutate(MainType = str_extract(Type,"[0-9]BP?[0-9]*"),
SubType = str_remove(Type,MainType),
SubType = if_else(SubType == "", NA_character_, SubType))
data
## A tibble: 88 x 5
# Variable Type Value MainType SubType
# <chr> <chr> <int> <chr> <chr>
# 1 XL 1B 1 1B NA
# 2 YL 1B 10 1B NA
# 3 ZL 11B 100 1B 1
# 4 ZL 21B 200 1B 2
# 5 ZL 31B 300 1B 3
# 6 ZL 41B 400 1B 4
# 7 XL 1BP2 1 1BP2 NA
# 8 YL 1BP2 10 1BP2 NA
# 9 ZL 11BP2 100 1BP2 1
#10 ZL 21BP2 200 1BP2 2
## … with 78 more rows
然后编写一个创建矩阵的函数相对容易:
library(purrr)
mytype <- "1BP2"
data %>%
dplyr::filter(!is.na(Value) & MainType == mytype) %>%
split(.,.$Variable) %>%
map(~.x %>%
pivot_wider(id_cols = c("Row","SubType"),
names_from = SubType,
values_from = Value) %>%
{as.matrix(.[,-1])}
)
$XL
1
[1,] 1
[2,] 2
[3,] 3
[4,] 4
[5,] 5
[6,] 6
[7,] 7
$YL
1
[1,] 10
[2,] 20
[3,] 30
$ZL
1 2 3
[1,] 100 200 300
[2,] 200 300 400
[3,] 300 600 1000
[4,] 400 900 1600
[5,] 500 1200 2200
[6,] 600 1500 2800
[7,] 700 1800 3400
数据
data <- structure(list(XL1B = 1:8, YL1B = c(10L, 20L, 30L, 40L, NA, NA,
NA, NA), ZL11B = c(100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L), ZL21B = c(200L, 300L, 600L, 900L, 1200L, 1500L, 1800L,
2100L), ZL31B = c(300L, 400L, 1000L, 1600L, 2200L, 2800L, 3400L,
4000L), ZL41B = c(400L, 500L, 1700L, 2900L, 4100L, 5300L, 6500L,
7700L), XL1BP2 = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, NA), YL1BP2 = c(10L,
20L, 30L, NA, NA, NA, NA, NA), ZL11BP2 = c(100L, 200L, 300L,
400L, 500L, 600L, 700L, NA), ZL21BP2 = c(200L, 300L, 600L, 900L,
1200L, 1500L, 1800L, NA), ZL31BP2 = c(300L, 400L, 1000L, 1600L,
2200L, 2800L, 3400L, NA)), class = "data.frame", row.names = c(NA,
-8L))