我使用R Tools for Visual Studio指南将R模型集成到存储过程中。语法如下:
ALTER PROCEDURE [dbo].[spRegressionPeak]
AS
BEGIN
EXEC sp_execute_external_script @language = N'R'
, @script = N'
# @InputDataSet: input data frame, result of SQL query execution
# @OutputDataSet: data frame to pass back to SQL
# Test code
#library(RODBC)
# channel <- odbcDriverConnect(dbConnection)
# InputDataSet <- sqlQuery(channel,iconv(paste(readLines(''~/visual studio 2017/prod360/regressionpeak.query.sql'', encoding = ''UTF-8'', warn = FALSE), collapse=''\n''), from = ''UTF-8'', to = ''ASCII'', sub = '''') )
# odbcClose(channel)
#'' Regression Peaks
#''
#'' Runs polynomial regressions on a data table with one model for each
#'' user ID - independent variable pair. Note that independent variables
#'' are identified as all columns matching the following pattern: the
#'' letter "c" followed by a one-or-more digit number. The dependent
#'' variable is identified by its name "dv". The user ID is identified by
#'' its name "id". Also note that the regressors are the means of the
#'' original observations, grouped by \code{code}.
#''
#'' @param x Table to run regressions on
#'' @param c_means Code means table
#'' @importFrom rlang .data
#''
#'' @return Summary table where each distinct \code{code} value is
#'' represented by one row with columns for the respective standard
#'' deviations of each independent variable.
#'' @export
regression_peak <- function(x, c_means) {
df <-
dplyr::select(x, .data$id, .data$code, .data$dv) %>%
dplyr::left_join(c_means, by = "code")
id <- unique(df$id)
iv <- names(df)[stringr::str_detect(names(df), "c\\d+")]
grid <- tidyr::crossing(id, iv)
peaks <- purrr::map2_df(grid$id, grid$iv, function(i_id, i_iv) {
x <-
dplyr::filter(df, .data$id == i_id) %>%
dplyr::select_at(dplyr::vars(.data$dv, i_iv)) %>%
dplyr::rename(iv = !!i_iv)
fit <- stats::lm(dv ~ iv + I(iv ^ 2), data = x)
coef_a <- stats::coef(fit)["iv"]
coef_b <- stats::coef(fit)["I(iv^2)"]
extr <-
tibble::tibble(
type = c(2, 1, 1),
iv = c(unname(-coef_a / (2 * coef_b)), min(x$iv), max(x$iv))
) %>%
dplyr::mutate(y = stats::predict(fit, newdata = ., type = "response"))
t_max <- extr$iv[extr$type == 2]
tibble::tibble(
id = i_id,
iv = i_iv,
max = dplyr::case_when(
min(x$iv) < t_max & t_max < max(x$iv) ~ extr$iv[which.max(extr$y)],
TRUE ~ extr$iv[which.max(extr$y[extr$type == 1])]
) # should be x value
)
})
tidyr::spread(peaks, .data$iv, .data$max) %>%
dplyr::select(.data$id, iv) %>%
dplyr::filter_at(dplyr::vars(dplyr::matches("c\\d+")),
dplyr::any_vars(!is.na(.)))
}
OutputDataSet <- InputDataSet
'
, @input_data_1 = N'-- Place SQL query retrieving data for the R stored procedure here
DECLARE @cols AS NVARCHAR(MAX),
@query AS NVARCHAR(MAX),
@StudyID int,
@sStudyID VARCHAR(50)
Select a.StudyId, a.RespID, p.ProductNumber, p.ProductSequence,
CONVERT(varchar(50),a.DateAdded,101) as StudyDate,
CONVERT(VARCHAR(15),CAST((a.DateAdded)AS TIME),100) as
StudyTime,DATENAME(dw,a.DateAdded) as [DayOfWeek],
p.A_Value as A,p.B_Value as B,p.C_Value as C,p.D_Value AS D,p.E_Value AS
E,p.F_Value AS F, q.QuestionNumber
from answers a
inner join Products p on a.ProductID = p.ProductID
inner join Questions q on a.QuestionID = q.QuestionID
where a.StudyID = @sStudyID'
--- Edit this line to handle the output data frame.
WITH RESULT SETS (([StudyID] int, [RespID] int, [ProductNumber] int,
[ProductSequence] int, [StudyDate] date, [StudyTime] time, [DayOfWeek]
VARCHAR(10),[QuestionNumber] int, [A] int, [B] int, [C] int, [D] int, [E]
int, [F] INT));
END;
当我在SQL Server Management Studio中执行存储过程时,返回值= 0并且不输出任何数据。我不确定变量是否被正确声明,因为我在执行存储过程时没有提示它们。
如何修改存储过程以返回预期数据?我可以通过提供适当的学习ID来从ASP.NET调用它吗?
答案 0 :(得分:2)
您的代码很难阅读,但对我而言,您似乎并没有声明要获取的参数。以下是如何执行此操作的示例:
DECLARE @out_val float;
exec sp_execute_external_script
@language = N'R',
@script = N'
iris_dataset <- iris
setosa <- iris[iris$Species == "setosa",]
menSepWidth <- mean(setosa$Sepal.Width)
iris_dataset$Sepal.Length <- iris_dataset$Sepal.Length * multiplier
OutputDataSet <- data.frame(iris_dataset$Sepal.Length)
',
@params = N'@multiplier float, @menSepWidth float OUTPUT',
@multiplier = 5,
@menSepWidth = @out_val OUTPUT
WITH RESULT SETS ((SepalLength float));
SELECT @out_val AS MeanSepWidth
当您使用sp_execute_external_script
时,请查看this blog post我在哪里讨论如何处理参数等。
希望这有帮助!