Question

我正在尝试过滤我的数据框。基本上我的数据是这样的：

select t.*, i.dept_name, i.class_name, i.subclass_name
from (
    select
        cast(t.start_tran_date+t.start_tran_time  - '1900-01-01 05:00' as date) tran_date
        ,t.item_number
        ,sum(t.tran_qty) qty
    from t_tran_log t
    where 
        t.start_tran_date + t.start_tran_time >= @startdate
        and t.start_tran_date+t.start_tran_time < @enddate
        and t.tran_type = '750'  
        and t.description like 'Customer Return' 
        and t.hu_id not like '1Z%'
    group by 
        cast(t.start_tran_date+t.start_tran_time  - '1900-01-01 05:00' as date), 
        t.item_number
) t
left join t_item_master i on t.item_number = i.item_number

我想摆脱“-”值，除非它是一年中唯一的值。所以输出看起来像这样：

year   value
2010    '-'
2010   '$64'
2011    '-'
2011   '$50'
2012    '-'
2013    '-'
2013   '$87'

任何想法将不胜感激！我敢肯定这是一个简单的解决方案，但是我很难弄清楚。

Answer 1

按“年”分组后，filter是一个选项。我们将检查该值是否不等于-，并且如果有单个元素（|）并且该值是，则不同元素的数量是否大于1或（n() == 1）。 '-'

library(dplyr)
df1 %>%
   group_by(year) %>%
   filter( value != '-' & n_distinct(value) > 1| (value == '-') & n() == 1)
# A tibble: 4 x 2
# Groups:   year [4]
#   year value
#  <int> <chr>
#1  2010 $64  
#2  2011 $50  
#3  2012 -    
#4  2013 $87

或者更紧凑的选择是获取'value'不等于'-'或（|）的行，其中'value'中-的元素数量为等于行数

df1 %>% 
  group_by(year) %>%
  filter( sum(value == '-') == n()| value != '-')

数据

df1 <- structure(list(year = c(2010L, 2010L, 2011L, 2011L, 2012L, 2013L, 
2013L), value = c("-", "$64", "-", "$50", "-", "-", "$87")),
class = "data.frame", row.names = c(NA, 
-7L))

Answer 2

带有subset + ave的基本R选项

subset(
  df,
  as.logical(
    ave(value,
      year,
      FUN = function(x) length(unique(x)) > 1 & x != "-" | length(unique(x)) == 1
    )
  )
)

给出

  year value
2 2010   $64
4 2011   $50
5 2012     -
7 2013   $87

Answer 3

使用base R的分步解决方案将是：

#Create unique values
dfun <- aggregate(value~year,df,function(x) length(unique(x)))
#Add to original dataframe
df$Var <- dfun[match(df$year,dfun$year),"value"]
#Create a flag
df$Flag=ifelse(df$value=="'-'" & df$Var>1,1,0)
#Filter
df2 <- subset(df,Flag==0)
df2$Var <- NULL
df2$Flag <- NULL

输出：

  year value
2 2010 '$64'
4 2011 '$50'
5 2012   '-'
7 2013 '$87'

使用了一些数据：

#Data
df <- structure(list(year = c(2010L, 2010L, 2011L, 2011L, 2012L, 2013L, 
2013L), value = c("'-'", "'$64'", "'-'", "'$50'", "'-'", "'-'", 
"'$87'"), Var = c(2L, 2L, 2L, 2L, 1L, 2L, 2L), Flag = c(1, 0, 
1, 0, 0, 1, 0)), row.names = c(NA, -7L), class = "data.frame")

根据另一列的条件过滤数据框

3 个答案:

数据