我使用dplyr和paste0来汇总来自TSQL查询的数据,目的是为每列输出均值和标准差c1-c8。为此,我创建了以下存储过程:
Declare @sStudy varchar(50)
Set @sStudy = Convert(Varchar(50),@StudyID)
Declare @inquery nvarchar(max) = N'Select
c.StudyID, c.RespID, c.ProductNumber, c.ProductSequence, c.BottomScaleValue,
c.BottomScaleAnchor, c.TopScaleValue, c.TopScaleAnchor, c.StudyDate,
c.DayOfWeek, c.A, c.B, c.C, c.D, c.E, c.F,
c.DependentVarYN, c.VariableAttributeID, c.VarAttributeName, c.[1] as c1,
c.[2] as c2, c.[3] as c3, c.[4] as c4, c.[5] as c5, c.[6] as c6, c.[7] as c7, c.[8] as c8
from ClosedStudyResponses c
--Sensory Value Attributes only for mean and standard deviation analytics.
where VariableAttributeID = 1
and c.StudyID =' +@sStudy ;
BEGIN TRY
--Insert into CodeMeans
exec sp_execute_external_script
@language = N'R',
@script = N'
library(dplyr)
codemeans <- function(StudyID){
res <- InputDataSet %>%
group_by (StudyID, ProductNumber) %>%
summarise_all(.funs=c(mean, sd)) %>%
setNames(c("StudyID","ProductNumber",
paste0("c",1:8, "_mean"),
paste0("c",1:8, "_sd")))
df <- data.frame(res)
}
',
@input_data_1 = @inquery,
@output_data_1_name = N'df',
@params = N'@StudyID int',
@StudyID = @StudyID
当我通过提供有效的StudyID来执行存储过程时,查询结束但不包含任何数据,尽管源表包含某些(如果不是大多数)列中的数据。
如何修改上述内容以确保返回数据?
答案 0 :(得分:0)
我认为您看不到结果的原因首先是因为您返回的是函数而不是data.frame。
我还认为summary_all的语法应该不同(没有.funs但很有趣)。
最后,我不确定setNames是否真的有所作为。我相信您也必须在外部定义列。我希望以下方法能起作用:
EXEC sp_execute_external_script
@language = N'R',
@script = N'
library(dplyr)
df <- InputDataSet %>%
group_by (StudyID, ProductNumber) %>%
summarise_all(funs(mean, sd)) %>%
setNames(c("StudyID","ProductNumber",
paste0("c",1:8, "_mean"),
paste0("c",1:8, "_sd")))
',
@input_data_1 = @inquery,
@output_data_1_name = N'df',
@params = N'@StudyID int',
@StudyID = @StudyID
WITH RESULT SETS(("StudyID" int, "ProductNumber" int, "c1_mean" int,
"c2_mean" int, "c3_mean" int,
"c4_mean" int, "c5_mean" int, "c6_mean" int, "c7_mean" int, "c8_mean" int,
"c1_sd" int, "c2_sd" int,
"c3_sd" int, "c4_sd" int, "c5_sd" int, "c6_sd" int, "c7_sd" int, "c8_sd" int
));