根据数据框中的属性查找和删除列

时间:2019-02-14 21:00:26

标签: r dataframe

下面的数据在每个列中都有嵌入的属性,并使用我要检查的属性来确定该列是否具有属性attr(,"SpotfireColumnMetaData")$DP.UniqueId

如果该列确实具有此属性,那么我想将它们保留在数据框中,否则我要删除不具有此特定属性的列。

除了使用for loop()之外,还有其他有效的方法吗?

数据

structure(list(MIR.tst_temp = structure(NA_character_, SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "", DP.TestName = "MIR.tst_temp", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), PRR.part_id = structure("1", SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "", DP.TestName = "PRR.part_id", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), PRR.part_id.count = structure(0L, SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "", DP.TestName = "PRR.part_id.count", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), PRR.hard_bin = structure(21L, SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "bin", DP.TestName = "PRR.hard_bin", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), PRR.soft_bin = structure(2100L, SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "bin", DP.TestName = "PRR.soft_bin", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), WIR.wafer_id = structure(NA_character_, SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "wafer", DP.TestName = "WIR.wafer_id", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), MIR.lot_id = structure(NA_character_, SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "lot", DP.TestName = "MIR.lot_id", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), PRR.x_coord = structure(NA_integer_, SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "diex", DP.TestName = "PRR.x_coord", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), PRR.y_coord = structure(NA_integer_, SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "diey", DP.TestName = "PRR.y_coord", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), PRR.site_num = structure(0L, SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "site", DP.TestName = "PRR.site_num", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), WRR.site_grp = structure(NA_integer_, SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "", DP.TestName = "WRR.site_grp", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), WRR.head_num = structure(NA_integer_, SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "", DP.TestName = "WRR.head_num", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), `ate_data[, 15]` = structure(-3.862381e-08, SpotfireColumnMetaData = list(
    DP.TestNumber = "13001", DP.Type = "", DP.TestName = "gross_idd_dcvs vdd3v0 14.a302", 
    DP.Info = "PTR.result", DP.TestUnit = "A", DP.Statistic = "raw", 
    DP.Program = "", DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, 
    Limits.Prod.Lower = -1.04e-07, Limits.Prod.Target = NaN, 
    Limits.Prod.Upper = 1.58e-06, Limits.Spec.Lower = -Inf, Limits.Spec.Target = NaN, 
    Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, Limits.Outlier.Target = NaN, 
    Limits.Outlier.Upper = Inf, Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, 
    Limits.Whatif.Upper = Inf, DP.ParamType = "PARAMETRIC", DP.BlockId = "", 
    DP.Scratch = "", DP.ColumnId = "", Dp.BaseName = "", DP.FTR.testtxt = "", 
    DP.PTR.testtxt = "gross_idd_dcvs VDD3V0 14.a302", DP.DTR.textdat = "", 
    DP.MPR.pinnum = "0", DP.UniqueId = "Start"))), class = "data.frame", row.names = c(NA, 
-1L))

1 个答案:

答案 0 :(得分:1)

一种选择是使用sapply遍历各列,以创建逻辑索引以对各列进行子集设置

i1 <- sapply(df1, function(x) length(attr(x, 
              "SpotfireColumnMetaData")$DP.UniqueId) > 0)
df1[i1]

或将Filteris.null一起使用

Filter(function(x) !is.null(attr(x, "SpotfireColumnMetaData")$DP.UniqueId ), df1)

tidyverse选项为keep

library(dplyr)
library(purrr)
keep(df1, ~ attr(.x, "SpotfireColumnMetaData")$DP.UniqueId %>%
                is.null %>% #check for NULL
                     '!') # negate and keep only those are not nulls

discard,我们在其中丢弃那些没有属性的列

discard(df1,  ~ attr(.x, "SpotfireColumnMetaData")$DP.UniqueId %>% 
                       is.null )