我必须根据特定条件过滤数据框。 如果解决方案考虑使用dplyr,那就更好了。
我有这样的数据框结构
CREATE PROCEDURE dbo.MTBFAlterView @PressType nvarchar(50), @TestName nvarchar(50)
, @PressName nvarchar(50), @Phase nvarchar(50)
AS
--Failure Report Table
begin
DECLARE @ViewDROP nvarchar(MAX) = N'DROP VIEW [dbo].[UV_filteredLogins]' -- 'N' has been added
DECLARE @ParmDefinition nvarchar(500);
DECLARE @STMT AS NVARCHAR(MAX) = N'
Create VIEW [dbo].[UV_filteredLogins]
as
SELECT logins.[ID]
, [Test_ID]
, phase.Phase_Name
, press.PressName
, pressType.Type_Description as PressType
, [Operator]
, [LoginDate]
, [LogoutDate]
, DATEDIFF(MINUTE,LoginDate,LogoutDate) as TimeDiff
FROM [TDM_Analysis].[dbo].[Logins] as logins
join [TDM_Analysis].[dbo].[Presses] as press on logins.Press_ID=press.ID
join [TDM_Analysis].[dbo].[Phases] as phase on logins.Phase_ID=phase.ID
join [TDM_Analysis].[dbo].[PressTypes] as pressType on pressType.ID=press.PressType_ID
join [TDM_Analysis].[dbo].[Tests] as test on logins.Test_ID=test.ID
where phase.Phase_Name= '''+@Phase +''' and press.PressName= '''+ @PressName +'''
and pressType.Type_Description= '''+@PressType +
/*Parameter's values are applied in WHERE condition*/
''' and [Test_ID]=TestName1 and logoutDate is not null
and Operator in (
SELECT au.Email
FROM [UsersAuthorization].[dbo].[RolesMembers] as RM
join [UsersAuthorization].[dbo].[ApplicationUsers] as AU
on RM.ApplicationUserID=au.ID
where rm.roleid=1
)';
EXEC sp_executesql @STMT -- View will be created.
select * from UV_filteredLogins -- Call it.
--EXEC sp_executesql @ViewDROP -- Drop query of view.
end
exec dbo.MTBFAlterView 'HP Indigo 10000', 'Go Green'
, 'MR-193','Test'
-- If you call this, then view is created and followed by 'SELECT'
如果列标签中包含特定值(例如3),则不仅收集该 Row ,而且收集所有具有相同的行
和上一个的合作伙伴价值。预期结果是这样
sentId. B. label. partner. code
1. 2. 3. 4. 123
1. 2. 2. 4. 124
4. 2. 3. 8. 125
7. 3. 2. 7. 126
答案 0 :(得分:1)
按“ sentId”和“合作伙伴”分组后,我们可以使用%in%
来filter
行。
library(dplyr)
df1 %>%
group_by(sentId., partner.) %>%
filter(3 %in% label.)
# A tibble: 3 x 5
# Groups: sentId. [2]
# sentId. B. label. partner. code
# <dbl> <dbl> <dbl> <dbl> <int>
#1 1 2 3 4 123
#2 1 2 2 4 124
#3 4 2 3 8 125
或者以紧凑的方式与data.table
library(data.table)
setDT(df1)[, .SD[3 %in% label.], .(sentId., partner.)]
或使用base R
df1[with(df1, ave(label.==3, sentId., partner., FUN = any)),]
df1 <- structure(list(sentId. = c(1, 1, 4, 7), B. = c(2, 2, 2, 3), label. = c(3,
2, 3, 2), partner. = c(4, 4, 8, 7), code = 123:126),
class = "data.frame", row.names = c(NA,
-4L))
答案 1 :(得分:1)
我们首先可以找到我们感兴趣的label
值所在的行索引,然后使用这些索引从整个数据帧中提取sentId
和partner
值的子集。
label_value <- 3
inds <- df$label == label_value
df[with(df, sentId %in% sentId[inds] & partner %in% partner[inds]), ]
# sentId B label partner code
#1 1 2 3 4 123
#2 1 2 2 4 124
#3 4 2 3 8 125
dplyr
中的逻辑将是
library(dplyr)
df %>%
filter(sentId %in% sentId[label == label_value] &
partner %in% partner[label == label_value])
答案 2 :(得分:1)
可以很容易地使用SQL来解决此问题,因此一种选择是使用sqldf
库:
library(sqldf)
# your data frame df
sql <- "SELECT t1.\"sentId.\", t1.\"B.\", t1.\"label.\", t1.\"partner.\", t1.code
FROM yourTable t1
WHERE t1.\"label.\" = '3.' OR
EXISTS (SELECT 1 FROM yourTable t2
WHERE t1.\"sentId.\" = t2.\"sentId.\" AND
t1.\"partner.\" = t2.\"partner.\" AND
t2.\"label.\" = '3.')"
result <- sqldf(sql)
注意:上面的演示实际上使用了MariaDB,因为SQLite无法使用演示工具。但这仍然表明查询逻辑是正确的。
答案 3 :(得分:1)
使用sqldf
:
它提取标签为3的sentID
和partner
作为两个内部查询,并从中获取结果。
names(df) <- gsub("\\.", "", names(df)) # to remove . from column name
sqldf("select * from df where (sentID IN (select sentID from df where label IS 3) OR
partner IN (select partner from df where label IS 3))")
输出:
sentId B label partner code
1 1 2 3 4 123
2 1 2 2 4 124
3 4 2 3 8 125