Question

我正在通过视频讲座和作业问题自学CalTech的“从数据中学习”课程。我遇到了一个家庭作业问题：我编写的代码似乎可以正常工作，但是我获得的答案与解决方案密钥中提供的答案不匹配。问题如下。

在以下问题中，我们将PLA与SVM进行比较，硬边界为1   线性可分离的数据集。对于每次运行，您将创建自己的   目标函数f和数据集D。取d = 2并选择一条随机线   在飞机上作为目标函数f（通过随机抽取两个，   [-1上的均匀分布点1]×[-1； 1]然后走线   通过它们），其中线的一侧映射到+1，而   其他映射到-1。选择数据集的输入xn作为随机数   X中的点= [-1; 1]×[-1； 1]，然后评估目标函数   每个xn以获得对应的输出yn。如果所有数据点都打开   在生产线的一侧，放弃运行并开始新的运行。启动PLA   与全零向量并为每个PLA选择错误分类的点   随机迭代。运行PLA查找最终假设gPLA和   衡量f和gPLA之间的差异，因为P [f（x）6 = gPLA（x）]（您   可以准确地计算出该值，也可以通过生成一个   足够大的独立点集进行评估）。现在开始   通过对SVM进行相同数据求解来找到最终假设gSVM   min_w; b 1 / 2w ^ Tw s.t.使用二次编程yn *（w ^ T * xn + b）≥1   关于原始或双重问题。衡量f之间的分歧   和gSVM作为P [f（x）！= gSVM（x）]，并计算支持数量   每次运行中得到的向量。

这些问题需要运行1000次迭代   针对样本中的N = 10和N = 100个数据点进行实验，   计算支持向量机方法表现更好的时间比例   而不是PLA方法，以及支持向量的数量。

我的尝试：我尝试使用quadprog软件包对R中的问题进行二次编程，以解决给定优化问题的对偶。对于N = 100（3），我得到了支持向量平均数的正确答案，但是我对SVM性能优于PLA的比例的回答（对于N = 100，约为90％，对于N = 10，约为80％）超出了解决方案键的答案（N = 100时为70％，N = 10时为60％）。我的代码/概念中有毛刺吗？我在实施Solve.QP时非常谨慎。我在每一步都添加了注释，以表明我打算做什么。

N= 10 (resp 100)
runs = 1 # Initializing the number of runs
count = 0 # Count of number of times Eout for SVM < Eout for PLA (SVM performs better)
sv = matrix(nrow = 1, ncol = 1000) # number of support vectors in each iteration
sv[,] = 0
while(runs<1001){
  x <- runif(2, min = -1, max = 1) # Picking a random point in (-1, 1)
  y <- runif(2, min = -1, max = 1)
  fit = (lm(y~x))
  t = summary(fit)$coefficients[,1]
  f <- function(x){ # A random line in the plane
    t[2]*x + t[1]
  }

  A = matrix(ncol=N, nrow=2) # Training data set, generated randomly, as required by question
  b = matrix(ncol=N, nrow=1)
  for(i in 1:N){
    A[, i] <- c(runif(2, min = -1, max = 1))
    b[1, i] <- sign(A[2, i] - f(A[1, i]))
  }

if(length(range(b)) ==1){next} # Move to the next iteration if all training points lie on the same side of the line

w <- matrix(ncol=1, nrow=3) # Weight vector, to be calculated using PLA
  w[, 1] = 0
  g <- function(z){
    t(w) %*% z
  }

  i = 1
  while(i < N+1){ # Running the Perceptron Learning Algorithm
    j = sample(1:N, 1)
    if((sign(g(c(1, A[, j]))) == b[1, j]) == 0){
      w = w + b[1, j]*c(1, A[, j])
    }
    i = i + 1
  }

  S = matrix(ncol=10000, nrow=2) # Testing data set, random points in XY plane
  for(v in 1:10000){
    S[, v] <- c(runif(2, min = -10000, max = 10000))
  }

  m = 0 # counter of Eout(PLA)
  v = 1
  while(v < 10001){
    if(sign(g(c(1, S[, v]))) != sign(S[, v][2] - f(S[, v][1]))){
      m = m + 1
    }
    v = v + 1
  }

  # SVM 

  require(quadprog)
# Implementing classification using the quadprog package, solve.QP command
  Dmat <- matrix(nrow = N, ncol = N)
  dvec <- matrix(nrow = N, ncol = 1)
  Amat <- matrix(nrow= N+1, ncol = N)
  Amat[,] = 0
  bvec= matrix(nrow = N+1, ncol =1)
  bvec[,1]=0
  for(i in 1:N){
    for(j in 1:N){
      Dmat[i, j] = b[i]*b[j]*t(A[,i])%*%A[,j] 
      dvec[i,1] = 1
      if(i>1 && i==j+1) {
        Amat[i,j] <- 1
      }
      else Amat[i,j] = 0
      bvec[i] = 0
    }
  }
  Dmat = Dmat + 10^(-8)*diag(N) # Done to solve the problem of non-positive definite matrix

  Amat[1,] = t(b)
  Amat[N+1,N]=1  # Amat has its first row as the vector b (as one constraint is b^T*alpha = 0), and the rest as identity because we need all alpha_i's greater than or equal to 0
  Amt = t(Amat)

  sol  <- solve.QP(Dmat, dvec, Amt, bvec, meq=1) #meq = 1 because the first inequality must be treated as an equality

  l = as.vector(sol["solution"])
  alpha=0
  for(i in 1:N){
    alpha[i]=as.numeric(rapply(l,c)[i]) # alpha records the obtained parameters
  }

  w1 = 0 # 2-column weight vector (excludes intercept term)
  for(n in 1:N){
    w1 = w1 + alpha[n]*b[n]*A[,n] # Formula for obtaining weights using alpha's
  }

  for(n0 in 1:N){
    if(abs(alpha[n0]) > 10^(-1)){
      break   # Picking an index corresponding to an actual support vector
    }
  }
  for(n in 1:N){
    if(abs(alpha[n]) > 10^(-1) & alpha[n]>0){
      sv[cntr] = sv[cntr]+1  # Support vector counter
    }
  }
  w0 = (1-t(w1)%*%A[,n0]*b[n0])/b[n0] # Formula for first entry of final weight vector, corresponding to intercept term
  w2 = c(w0,w1) # final weight vector
  m1 = 0 # Eout counter for SVM
  v1 = 1

  g1 <- function(z){ #function: multiplication by the weight vector
    t(w2) %*% z
  }

  while(v1 < 10001){ # Counting the number of errors
    if(sign(g1(c(1, S[, v1]))) != sign(S[, v1][2] - f(S[, v1][1]))){
      m1= m1 + 1
    }
    v1 = v1 + 1
  }

  if(m>m1){count = count + 1/1000} # If out-of-sample error m in PLA is higher than out-of-sample error in SVM, add to the count 
  runs = runs+1/1000
}
count*100   # corresponds to the percentage of times SVM performed better:
code yields about 90% for N =100 and about 80% for N = 10, but solution key says 70% for N = 100 and 60% for N = 10

支持向量机和感知器在R

0 个答案: