Question

我有一个像这样的字符串（变量和常量的数量并不重要）：

> my_string <- "-x+2y+z=-1; x-3y-2z=-1; 3x-y-z=4"

我知道如何使用替换和数字函数获取cbind(A, b)矩阵......

#    [,1]   [,2]   [,3]    [,4]
# [1,] -1     2      1      -1
# [2,]  1    -3     -2      -1
# [3,]  3    -1     -1       4

...但不知道如何自动获取两个矩阵A和b

A
#    [,1]   [,2]   [,3]
# [1,]-1     2      1
# [2,] 1    -3     -2 
# [3,] 3    -1     -1  

b
#    [,1]   
# [1,]-1
# [2,]-1
# [3,] 4

这意味着如何在=上拆分此字符串以获得一个矩阵，其中数字元素位于等号前面，另一个矩阵位于其后面？

EDIT。到目前为止，我做到了这一点：

my_string<-"-x+2y+z=-1; x-3y-2z=-1; 3x-y-z=4"    
my_string<-gsub('([[:punct:]]|\\s)([a-z])', '\\11\\2', my_string)   
my_string<-stringr::str_replace_all(my_string,"[a-z]"," ")    
my_string<-stringr::str_replace_all(my_string,"; ",";")    
my_string<-stringr::str_replace_all(my_string,"[-]","+-")       
my_string<-stringr::str_replace_all(my_string,"[+]"," ")    
my_string<-stringr::str_replace_all(my_string,"[=] ","=")    
my_string<-stringr::str_replace_all(my_string,"   ",",")    
my_string<-stringr::str_replace_all(my_string,"  ",",")    
my_string<-stringr::str_replace_all(my_string," ",",")    
my_string<-gsub("^,","",my_string)        
my_string <- strsplit(my_string, "=|;")

我获得了：

# "-1,2,1"  "-1"      "1,-3,-2" "-1"      "3,-1,-1" "4"

如何连接这些字符串？

> A <- "-1,2,1,1,-3,-2,3,-1,-1"
> b <- "-1,-1,4"

Answer 1

以下是一些替代方案。所有人都可以处理问题中显示的my_string之类的字符串，但（3），（4）和（5）也可以处理某些变量缺失且变量乱序的方程式。只有（4）硬编码变量名，但它在（5）中概括。

1）在任何没有赋予s1的数字乘数的变量之前插入1。然后提取变量名称，假设它们在每个字母上，并计算给出数字n的唯一变量名称。然后提取数字，将它们转换为数字并使用n将它们整形为矩阵。假设所有三个变量都存在于每个等式中，并且它们的顺序相同，因为问题的例子就是这种情况。

library(gsubfn)

my_string<-"-x+2y+z=-1; x-3y-2z=-1; 3x-y-z=4"
s1 <- gsub('(^|\\W)([a-z])', '\\11\\2', my_string) # from your prior question

n <- length(strapplyc(s1, "[a-z]", simplify = unique))
matrix(strapply(s1, "(-?\\d+)", as.numeric, simplify = c), n, byrow = TRUE)

，并提供：

     [,1] [,2] [,3] [,4]
[1,]   -1    2    1   -1
[2,]    1   -3   -2   -1
[3,]    3   -1   -1    4

2）变体是以分号给出s1从上方分割s2。然后使用strapply挑选出mat的数字。最后将数字从字符转换为数字。

library(gsubfn)

s2 <- strsplit(s1, ";")

mat <- do.call("rbind", sapply(s2, strapply, "(-?\\d+)"))
matrix(as.numeric(mat), nrow(mat))

，并提供：

     [,1] [,2] [,3] [,4]
[1,]   -1    2    1   -1
[2,]    1   -3   -2   -1
[3,]    3   -1   -1    4

3）此备选方案可以处理缺失的变量，例如下面的示例中第一个等式中缺少y的情况。 varnames是变量名。 extr函数采用变量名称并提取其系数，如果变量未出现，则采用0。

library(gsubfn)

my_string2 <- "-x+z=-1; x-3y-2z=-1; 3x-y-z=4"
s1 <- gsub('(^|\\W)([a-z])', '\\11\\2', my_string2)
s2 <- strsplit(s1, ";")

varnames <- sort(strapplyc(s1, "[a-z]", simplify = unique))
extr <- function(x) 
  strapply(s2[[1]], paste0("-?\\d", x), ~ as.numeric(gsub("\\D", "", x)), empty = 0)
A <- sapply(varnames, extr)
b <- as.numeric(sub(".*=", "", s2[[1]]))

，并提供：

> A
     x y z
[1,] 1 0 1
[2,] 1 3 2
[3,] 3 1 1
> b
[1] -1 -1  4

4）这个x替换为*c(1, 0, 0)，y替换为*c(0,1,0)，z替换为*c(0,0,1)并评估它们以产生A。它特别简单。它还可以处理并非所有变量都存在的方程式。它假设变量为x，y和z，尽管它可以推广。

my_string2 <- "-x+z=-1; x-3y-2z=-1; 3x-y-z=4"
s1 <- gsub('(^|\\W)([a-z])', '\\11\\2', my_string2)
s2 <- strsplit(s1, ";")
s <- sub("=.*", "", s2[[1]])
s <- gsub("x", "*c(1, 0, 0)", s)
s <- gsub("y", "*c(0, 1, 0)", s)
s <- gsub("z", "*c(0, 0, 1)", s)
A <- eval(parse(text = paste("rbind(", paste(s, collapse = ","), ")")))
b <- as.numeric(sub(".*=", "", s2[[1]]))

，并提供：

> A
     [,1] [,2] [,3]
[1,]   -1    0    1
[2,]    1   -3   -2
[3,]    3   -1   -1
> b
[1] -1 -1  4

5）这是（4）的通用版本，其中x，y和z不是硬编码的。它可以处理无序和缺失的变量。我们首先在varnames中获取变量名称，将输入字符串分成spl，对于第i个变量名称，将其替换为0的向量，在第i个位置给出ss1，插入*在以带有ss2的数字作为前缀的任何此类向量之前，删除=以及之后的所有内容并用cbind(...)将其包围，并将其评估为R表达式，给出{{1} }}。 A是b转换为数字后的所有内容。

，并提供：

library(gsubfn)
my_string2 <- "-z+x=-1; x-3y-2z=-1; 3x-y-z=4"
ss0 <- my_string2
varnames <- sort(strapplyc(ss0, "[a-z]", simplify = unique))
spl <- strsplit(ss0, ";")[[1]]
ss1 <- gsubfn("[a-z]", x ~ (match(x, varnames) == seq_along(varnames))+0, spl)
ss2 <- gsub("(\\d)c", "\\1*c", ss1)
ss3 <- sub("=.*", "", ss2)
A <- eval(parse(text = paste("rbind(", paste(ss3, collapse = ","), ")")))
b <- as.numeric(sub(".*=", "", ss2))

Answer 2

仅限基础R.有点丑陋，对strsplit和*apply函数的调用过多。

my_string <- "-x+2y+z=-1; x-3y-2z=-1; 3x-y-z=4"

sp1 <- unlist(strsplit(my_string, ";"))
sp2 <- strsplit(sp1, "=")
b <- as.numeric(sapply(sp2, '[[', 2))
sp3 <- lapply(lapply(sp2, '[[', 1), function(s) gsub("([-+])([[:alpha:]])", "\\11\\2", s))
sp3 <- lapply(sp3, trimws)
sp3 <- lapply(sp3, function(s) sub("^([[:alpha:]])", "1\\1", s))
A <- do.call(rbind, lapply(sp3, function(x) as.numeric(unlist(strsplit(x, "[[:alpha:]]")))))

Answer 3

这是一个基本版本，具有相当简单的正则表达式：

mystring <- "-x+2y+z=-1; x-3y-2z=-1; 3x-y-z=4"
equations <- strsplit(mystring, '; ')[[1]]    # split equations
coefs <- strsplit(equations, '[xyz=]+')    # split into list of vectors of coefficients

# iterate over coefficients, clean, and simplify
Ab <- t(sapply(coefs, function(x){
    missing1 <- !grepl('\\d', x);    # detect coefficients with no numbers
    x[missing1] <- paste0(x[missing1], '1');    # paste ones on those
    as.numeric(x)    # coerce from strings (substitute `as.integer` if suitable)
}))

Ab
#>      [,1] [,2] [,3] [,4]
#> [1,]   -1    2    1   -1
#> [2,]    1   -3   -2   -1
#> [3,]    3   -1   -1    4

A <- Ab[, 1:3]
b <- Ab[, 4, drop = FALSE]

A
#>      [,1] [,2] [,3]
#> [1,]   -1    2    1
#> [2,]    1   -3   -2
#> [3,]    3   -1   -1

b
#>      [,1]
#> [1,]   -1
#> [2,]   -1
#> [3,]    4

solve(A, b)
#>      [,1]
#> [1,]    2
#> [2,]   -1
#> [3,]    3

如果您愿意，请使用do.call(cbind, lapply(...))代替t(sapply(...))。请注意，正则表达式的简单性取决于方程的规律性;如果条款出现故障或缺失，您将需要更强大的解决方案。

Answer 4

已经解决了，但我只是想发布我的

my_string <- "-x+2y+z=-1; x-3y-2z=-1; 3x-y-z=4"
split <- strsplit(strsplit(my_string, ";")[[1]], "=")
right <- do.call(rbind, lapply(split, function(x) as.numeric(x[[2]])))
left <- lapply(split, function(x) x[[1]])
left <- do.call(rbind, lapply(left, function(x) {
  eq_fs = unlist(strsplit(x, "\\W")); eq_fs = eq_fs[eq_fs != ""]
  eq_ss = unlist(strsplit(x, "\\w"))
  eq_ss = eq_ss[c(T, eq_ss[2:length(eq_ss)] != "")]
  idx = grepl("\\d", eq_fs)
  nums = rep(1, length(eq_fs))
  nums[idx] = gsub(".*?(\\d).*", "\\1", eq_fs[idx], perl = TRUE)
  nums = as.numeric(nums) * as.numeric(paste0(eq_ss, 1))
  return(nums)
}))

方程组。如何拆分字符串以获得R中的两个矩阵A和b

4 个答案: