Question

在这里尝试学习R .. 我有一个数据框，正在尝试以编程方式从某些列构造矩阵。在语法上花了很长时间。

这是输入数据

XV= 0.5 0.5 1 1.5 3.5 5.5 7 9 NA NA NA NA NA NA NA NA 

YV= 5 10 25 15 15 25 25 45 NA NA NA NA NA NA NA NA 

type= 1BP2 2B 1BP2 2BP 1BP2 1BP2 1BP2 1BP2 NA NA NA NA NA NA NA NA

这是df dataframe

我不提前知道有多少行，但是ZL术语的编号与XL变量的编号相同，并且ZL变量的列与YL变量的行一样多。所有这些都导致了一个简单的2D插值算法，我正在尝试构建查找矩阵。

代码像这样

    z <- curvsx(XV,YV,type,lookupfile)

in curvsx...

    curvsx <- function(xi,yi,type,dfname) {

    df<-read.csv(dfname)

    xl<-eval(parse(text=(paste("na.omit(df$XL",TYPE,")",sep=""))))

    Nxl<-length(na.omit(xl))

    yl<-eval(parse(text=(paste("na.omit(df$YL",TYPE,")",sep=""))))

    Nyl<-length(na.omit(yl))

对于上面的TYPE =“ 1BP2”代码产生

    TYPE
     "1BP2"

    Nxl
     7

    Nyl
     3

    xl
     1 2 3 4 5 6 7

    attr(,"na.action")


    attr(,"class")
     "omit"

    yl
    10 20 30

    attr(,"na.action")
    4 5 6 7 8

    attr(,"class")
    "omit"   

    #this works but is clunky... (print statements to aid debug)
    for (i in 1:Nyl) {

     zl<-eval(parse(text=(paste("na.omit(df$ZL",i,TYPE,")",sep=""))))

     if(i==1) {zo<-matrix(zl,nrow=Nxl,byrow=FALSE)}

     else {zo<-matrix(c(zo,zl),nrow=Nxl,byrow=FALSE)}

     print(zo)

    }
    # produces...

         [,1] [,2] [,3]
    [1,]  100  200  300
    [2,]  200  300  400
    [3,]  300  600 1000
    [4,]  400  900 1600
    [5,]  500 1200 2200
    [6,]  600 1500 2800
    [7,]  700 1800 3400

这是我优雅地尝试在1行中完成的尝试，但不起作用...

    zo=matrix(c(eval(parse(text=(paste("na.omit(df$ZL",i=1:Nyl,TYPE,")",sep=""))))),ncol=Nyl,byrow=FALSE)
    # results in...

    > zo 
         [,1] [,2] [,3]
    [1,]  300 1600 3400
    [2,]  400 2200  300
    [3,] 1000 2800  400

当我尝试通过分段查看来调试1个内衬时...

    paste("na.omit(df$ZL",i=1:Nyl,TYPE,")",sep="")

    # produces

   "na.omit(df$ZL11BP2)" "na.omit(df$ZL21BP2)" "na.omit(df$ZL31BP2)"

    parse(text=(paste("na.omit(df$ZL",i=1:Nyl,TYPE,")",sep="")))

    # produces

    expression(na.omit(df$ZL11BP2), na.omit(df$ZL21BP2), 
    na.omit(df$ZL31BP2))    

    # but things seem to go south on

    c(eval(parse(text=(paste("na.omit(df$ZL",i=1:Nyl,TYPE,")",sep="")))))

    #which produces

    300  400 1000 1600 2200 2800 3400 

    # what I was hoping for, I thought I was executing this

    c(na.omit(df$ZL11BP2), na.omit(df$ZL21BP2), na.omit(df$ZL31BP2))

    # which produces what I want...

    100  200  300  400  500  600  700  200  300  600  900 1200 1500 1800  300  400 1000 1600 2200 2800 3400

我缺少或不了解的内容

谢谢

Answer 1

一种方法是使用tidyr延长数据透视时间，并将列名称分为Variable，MainType和SubType。

然后可以轻松过滤它们。

library(tidyr)
library(dplyr)
library(stringr)
data <- data %>%
  pivot_longer(everything(), values_to = "Value") %>%
  separate(name,into = c("Variable","Type"), sep = 2) %>%
  mutate(MainType = str_extract(Type,"[0-9]BP?[0-9]*"),
         SubType = str_remove(Type,MainType),
         SubType = if_else(SubType == "", NA_character_, SubType)) 
data
## A tibble: 88 x 5
#   Variable Type  Value MainType SubType
#   <chr>    <chr> <int> <chr>    <chr>  
# 1 XL       1B        1 1B       NA     
# 2 YL       1B       10 1B       NA     
# 3 ZL       11B     100 1B       1      
# 4 ZL       21B     200 1B       2      
# 5 ZL       31B     300 1B       3      
# 6 ZL       41B     400 1B       4      
# 7 XL       1BP2      1 1BP2     NA     
# 8 YL       1BP2     10 1BP2     NA     
# 9 ZL       11BP2   100 1BP2     1      
#10 ZL       21BP2   200 1BP2     2      
## … with 78 more rows

然后编写一个创建矩阵的函数相对容易：

library(purrr)
mytype <- "1BP2"
data %>%
  dplyr::filter(!is.na(Value) & MainType == mytype)  %>%
  split(.,.$Variable) %>% 
  map(~.x %>%
        pivot_wider(id_cols = c("Row","SubType"),
                    names_from = SubType,
                    values_from = Value) %>%
        {as.matrix(.[,-1])}
      )
$XL
     1
[1,] 1
[2,] 2
[3,] 3
[4,] 4
[5,] 5
[6,] 6
[7,] 7
$YL
      1
[1,] 10
[2,] 20
[3,] 30
$ZL
       1    2    3
[1,] 100  200  300
[2,] 200  300  400
[3,] 300  600 1000
[4,] 400  900 1600
[5,] 500 1200 2200
[6,] 600 1500 2800
[7,] 700 1800 3400

数据

data <- structure(list(XL1B = 1:8, YL1B = c(10L, 20L, 30L, 40L, NA, NA, 
NA, NA), ZL11B = c(100L, 200L, 300L, 400L, 500L, 600L, 700L, 
800L), ZL21B = c(200L, 300L, 600L, 900L, 1200L, 1500L, 1800L, 
2100L), ZL31B = c(300L, 400L, 1000L, 1600L, 2200L, 2800L, 3400L, 
4000L), ZL41B = c(400L, 500L, 1700L, 2900L, 4100L, 5300L, 6500L, 
7700L), XL1BP2 = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, NA), YL1BP2 = c(10L, 
20L, 30L, NA, NA, NA, NA, NA), ZL11BP2 = c(100L, 200L, 300L, 
400L, 500L, 600L, 700L, NA), ZL21BP2 = c(200L, 300L, 600L, 900L, 
1200L, 1500L, 1800L, NA), ZL31BP2 = c(300L, 400L, 1000L, 1600L, 
2200L, 2800L, 3400L, NA)), class = "data.frame", row.names = c(NA, 
-8L))

如何将字符串作为R中的表达式求值

1 个答案: