为避免反复等待几分钟才能编译c ++程序,我想检测R中的某些方程式并将其转换为整数序列,以由其他函数处理(该函数已在主要软件)。所讨论的方程式目前仅由一个未知数/参数,一些偏移量/缩放类型操作以及对数和指数的某种组合组成。一个小例子如下:
#user defined list of equations
eqs <- c('2*log(1+exp(0+3*d))-2', '2*d+1')
#replace this next line with code to detect / arrange equation list into integers
inteqs <- data.frame(type=c(1L,0L),outeroffset = c(-2L,1L),
inneroffset=c(0L,0L),outermultiply=c(2L,2L),innermultiply=c(3L,1L))
#transformation function within main program
tform <- function(d,type,outeroffset,inneroffset, outermultiply, innermultiply){
if(type==0) return(outeroffset + outermultiply * (inneroffset + innermultiply *d))
if(type==1) return(outeroffset + outermultiply * log(1+exp(inneroffset + innermultiply *d)))
}
for(d in c(-2,0,3.1)){ #arbitrary values of the unknown / parameter
print(2*log(1+exp(0+3*d))-2) #true value eq1
print(do.call(tform,c(d=d,inteqs[1,]))) #function returned value eq1
print(2*d+1) #true value eq2
print(do.call(tform,c(d=d,inteqs[2,])))#function returned value eq2
}
如示例中所述,我希望一些函数将上述形式的任意方程式转换为适当的整数序列。
答案 0 :(得分:1)
完全不同的方法:使用用户提供的公式生成数据点,然后将曲线拟合到这些点以恢复参数。另外,我们可以使用AIC(或您喜欢的任何一种度量方法)来识别用户最初提供的公式类型。
优点:对编写相同公式的各种方式都很有信心。相对容易地扩展到不同类型的公式。代码比“ tree-walking”替代方法更具可读性。
缺点:严重地,这似乎是过大的杀伤力。也可能会有点吵(即不能保证100%恢复正确的公式)。函数可能需要进行一些调整才能使用x值的不同范围,具体取决于我们正在测试的公式。您还必须手动指定哪些公式类型允许使用哪些参数。
# Function to figure out the parameters of a user-written formula.
library(plyr)
library(dplyr)
fit.eqs = function(e) {
# List the types of formulas we might encounter.
formula.types = data.frame(
type = 0:3,
formula = c(
"y ~ a * x + b",
"y ~ a * log(1 + exp(c * x + d)) + b",
"y ~ a * log(x + d) + b",
"y ~ a * (exp(x) / log(1 + exp(x + d))) + b"
),
outeroffset = 0,
inneroffset = c(NA, 0, 0, 0),
outermultiply = 1,
innermultiply = c(NA, 1, NA, NA),
aic = NA,
stringsAsFactors = F
)
# Get some x values over a wide range, and compute the corresponding y
# values.
xs = seq(0.01, 10, 0.01)
ys = eval(eval(substitute(substitute(e, list(d = xs)), list(e = as.quoted(e)[[1]]))))
data.to.fit = data.frame(x = xs, y = ys + rnorm(length(ys), 0, min(diff(ys)) / 100))
# Try to fit each formula to the data.
for(i in 1:nrow(formula.types)) {
start.params = list(a = 1, b = 0)
if(!is.na(formula.types$innermultiply[i])) {
start.params[["c"]] = 1
}
if(!is.na(formula.types$inneroffset[i])) {
start.params[["d"]] = 0
}
fit = nls(as.formula(formula.types$formula[i]),
data = data.to.fit,
start = start.params,
control = list(warnOnly = T))
formula.types$outeroffset[i] = round(coef(fit)[["b"]])
formula.types$outermultiply[i] = round(coef(fit)[["a"]])
if(!is.na(formula.types$innermultiply[i])) {
formula.types$innermultiply[i] = round(coef(fit)[["c"]])
}
if(!is.na(formula.types$inneroffset[i])) {
formula.types$inneroffset[i] = round(coef(fit)[["d"]])
}
formula.types$aic[i] = AIC(fit)
}
# Return the values we found.
return(formula.types %>%
filter(aic == min(aic)) %>%
mutate(inneroffset = coalesce(inneroffset, 0),
innermultiply = coalesce(innermultiply, 1)) %>%
select(type, outeroffset, inneroffset, outermultiply, innermultiply))
}
# Equations for testing.
eqs <- c('2*log(1+exp(0+3*d))-2',
'2*log(1+exp(3*d))-2',
'log(1+exp(3*d+0))*2-2',
'2*d+1',
'(2*d)+1',
'(1)+(2*d)',
'log(1+d)*2')
# Parse the equations and produce the correct integers.
inteqs.fitted = do.call(
"bind_rows",
lapply(eqs, fit.eqs)
)
答案 1 :(得分:0)
这是一种方法。这不是最有效或最可靠的方法,但是它可以处理示例中给出的方程,并且(据我测试)对操作数顺序和使用括号的差异具有鲁棒性。它不会不处理偏移量的非原子值(例如,d + -3
,而不是d - 3
)。
# This recursive function walks down the tree and extracts offsets/multipliers.
# The "e" argument contains the expression we want to parse. The "l" argument
# specifies the current level we're trying to parse (outer offset -> outer
# multiplier -> type -> inner offset -> inner multiplier). It returns a named
# list with the relevant values specified.
library(dplyr)
extract.integers = function(e, l) {
# If we're done, no need to do anything else.
if(l == "done") { return(list(type = 0)) }
# Initialize the list of values we're going to return.
values.to.return = list()
# Based on the current level, which level will we explore next?
next.level = "done"
if(l == "outeroffset") { next.level = "outermultiply" }
else if(l == "outermultiply") { next.level = "type" }
else if(l == "type") { next.level = "inneroffset" }
else if(l == "inneroffset") { next.level = "innermultiply" }
# If we're finding an offset, determine its value by adding to (or
# subtracting from) zero. If we're finding a multiplier, determine its value
# by multiplying by (or dividing into) one.
default.arg = 0
if(grepl("multiply", l)) {
default.arg = 1
}
# If the expression does not involve an operation, we've hit the bottom of
# the tree.
if(!is.call(e)) {
return(list())
}
# Otherwise, get the top-level operation.
else {
operation = e[[1]]
log.equation = F
}
# If this is a log or parentheses, go straight down to the next level (and
# set the equation type appropriately if this is a log).
if(is.element(as.character(operation), c("log", "("))) {
next.expression = e[[2]]
if(as.character(operation) == "log") {
log.equation = T
if(is.numeric(next.expression[[2]])) {
next.expression = next.expression[[3]]
} else {
next.expression = next.expression[[2]]
}
next.expression = next.expression[[2]]
}
else {
next.level = l
}
}
# Otherwise, figure out which argument has the actual value of the
# offset/multiplier and which has the next expression we're going to parse.
else {
arg1 = e[[2]]
arg2 = e[[3]]
arg.with.value = arg1
next.expression = arg2
if(is.numeric(arg2)) {
arg.with.value = arg2
next.expression = arg1
}
# If the operation matches the level we're trying to identify, proceed.
if((grepl("offset", l) & is.element(as.character(operation), c("+", "-"))) |
(grepl("multiply", l) & is.element(as.character(operation), c("*", "/")))) {
values.to.return[[l]] = eval(as.call(list(operation, default.arg, arg.with.value)))
}
# Otherwise, try the next level down.
else {
next.expression = e
}
}
# Recursive call to get values "lower down" in the expression. Fill in the
# value of the equation type, if known.
sub.values.to.return = extract.integers(next.expression, next.level)
if(log.equation) {
values.to.return[["type"]] = 1
}
# Collect all the values we know so far. Order is important, because we want
# to overwrite default/earlier values appropriately.
return(c(list(type = 0,
outeroffset = 0,
inneroffset = 0,
outermultiply = 1,
innermultiply = 1),
sub.values.to.return,
values.to.return))
}
使用提供的方程式进行测试,并进行一些改动:
# Test equations.
eqs <- c('2*log(1+exp(0+3*d))-2',
'2*log(1+exp(3*d))-2',
'log(1+exp(3*d+0))*2-2',
'2*d+1',
'(2*d)+1',
'(1)+(2*d)')
# Parse test equations.
inteqs = do.call(
"bind_rows",
lapply(
eqs,
function(x) {
extract.integers(parse(text = x)[[1]], "outeroffset")
}
)
) %>%
select(type, outeroffset, inneroffset, outermultiply, innermultiply)
# Check whether parses are correct.
for(d in c(-2, 0, 3.1)) { #arbitrary values of the unknown / parameter
print(2*log(1+exp(0+3*d))-2) #true value eq1
print(do.call(tform,c(d=d,inteqs[1,]))) #function returned value eq1
print(2*d+1) #true value eq2
print(do.call(tform,c(d=d,inteqs[4,])))#function returned value eq2
}
答案 2 :(得分:0)
由于ASK发布的曲线拟合方法不适用于不确定的系统(例如y〜a *(c * x + d)+ b),因此我更改了一些元素,完整的解决方案可能会有所帮助,因此请在此处发布。这不会返回指定的精确方程,而是一个可比较的方程。
library(plyr)
library(dplyr)
library(mize)
types=0:3
fit.eqs = function(e) {
# List the types of formulas we might encounter.
formula.types = data.frame(
type =types,
formula = c(
"y ~ a * x + b",
"y ~ a * log(1 + exp(c * x + d)) + b",
"y ~ a * exp(c * x + d) + b",
"y ~ a * (exp(c * x + d) / (1 + exp(c * x + d))) + b"
),
outeroffset = 0,
inneroffset = c(NA, rep(0,length(types)-1)),
outermultiply = 1,
innermultiply = c(NA, rep(1,length(types)-1)),
lsfit = NA,
stringsAsFactors = FALSE
)
# Get some x values over a wide range, and compute the corresponding y
# values.
x = c(seq(-2, 2, .1),seq(-10,10,.5),c(rnorm(10)))
y = eval(eval(substitute(substitute(e, list(param = x)), list(e = as.quoted(e)[[1]]))))
x <- x[abs(y) < 1e5];
y <- y[abs(y) < 1e5]
# Try to fit each formula to the data.
for(i in 1:nrow(formula.types)) {
start.params = list(a = 1.01, b = 0.01)
if(!is.na(formula.types$innermultiply[i])) {
start.params[["c"]] = 1.01
}
if(!is.na(formula.types$inneroffset[i])) {
start.params[["d"]] = 0.01
}
ff <- function(pars){
a=pars[1];b=pars[2];c=pars[3];d=pars[4]
yest<- eval(parse(text=gsub('y ~','',as.character(formula.types$formula[i]),fixed=TRUE)))
res <- (sum( ((y-yest)^2)/(abs(y)+.01)))
if(is.na(res)) res <- 1e100
return(res)
}
ffg <- function(pars){
g=try(numDeriv::grad(ff,pars,method='simple',
method.args=list(eps=1e-8,d=1e-10,r=2)
),silent=TRUE)
if(class(g)=='try-error') g <- rnorm(pars)
if(any(is.na(g))) g[is.na(g)] <- rnorm(sum(is.na(g)))
return(g)
}
fit = try(mize(par = unlist(start.params),
fg = list(fn=ff,gr=ffg),
max_iter=100,abs_tol=1e-3,rel_tol=1e-5,
method='BFGS'))
if(fit$f < .1 && fit$f > 1e-5) {
message('close, ', round(fit$f,3))
fit = try(mize(par = unlist(start.params), #if close, refine estimate
fg = list(fn=ff,gr=ffg),
max_iter=500,abs_tol=1e-5,rel_tol=1e-6,
method='BFGS'))
}
if(class(fit)=='try-error') browser()
formula.types$outeroffset[i] = fit$par[2] #round(coef(fit)[["b"]])
formula.types$outermultiply[i] = fit$par[1] #round(coef(fit)[["a"]])
if(!is.na(formula.types$innermultiply[i])) {
formula.types$innermultiply[i] = fit$par[3] #round(coef(fit)[["c"]])
}
if(!is.na(formula.types$inneroffset[i])) {
formula.types$inneroffset[i] = fit$par[4] #round(coef(fit)[["d"]])
}
formula.types$lsfit[i] = fit$f #AIC(fit)
}
# Return the values we found.
print(formula.types)
return(formula.types %>%
filter(lsfit == min(lsfit)) %>%
mutate(inneroffset = coalesce(inneroffset, 0),
innermultiply = coalesce(innermultiply, 1)) %>%
select(type, outeroffset, inneroffset, outermultiply, innermultiply,lsfit))
}
# Equations for testing.
eqs <- c('2*log(1+exp(0+3*param))-2',
'2*log(1+(exp(3*param)))-2',
'1.4 * (exp(3 * param + 8) / (1 + exp(3 * param + 8))) + .4',
'2*param+1',
'(2*param)+1',
'(1)+(2*param)',
'exp(1+param*1)*3',
'param^3',
'sqrt(exp((param^2)))'
)
# Parse the equations and produce the correct integers.
inteqs.fitted = do.call(
"bind_rows",
lapply(eqs, fit.eqs)
)
round(inteqs.fitted,3)