Question

是否有高效的R-package来处理以下问题：

我有一组数值观察（数千的N），范围从一百万到一百万。给定目标值并且舍入准确度是线性组合，权重-1（减法）/ 0（遗漏）/ 1（加法）使得总和等于舍入误差内的目标值并且还呈现权重？

Answer 1

以下是我根据您的情况修改过的遗传算法，有关算法的解释，请参阅my answer there。可能有（当然）方法用较少的代码解决您的问题，但我已经在架子上有这个解决方案，并且适应它很简单。所需的输入是data.frame，其中包含列值和列权重，可以全为零：

     value weights
1       45       0
2       33       0
3       47       0
4       65       0
5       12       0
6       43       0
7        5       0
...     ...      ...

然后，算法将从集c(-1,0,1)中找到一组权重，使其为

的值

abs(target_value - sum(final_solution$value*final_solution$weights))

被最小化。

肯定还有改进的余地，例如权重现在完全随机设置，因此初始解决方案的预期加权总和始终为0.如果target_value非常高，则最好将1分配给更高的概率比-1，更快地收敛到最优解。

对于这种情况，似乎效果很好，100000个对象和目标值12000，它会在几分之一秒内找到最佳解决方案：

代码：

### PARAMETERS -------------------------------------------

n_population = 100 # the number of solutions in a population
n_iterations = 100 # The number of iterations
n_offspring_per_iter = 80 # number of offspring to create per iteration
frac_perm_init = 0.25 # fraction of columns to change from default solution while creating initial solutions
early_stopping_rounds = 100 # Stop if score not improved for this amount of iterations


### SAMPLE DATA -------------------------------------------------

n_objects = 100000
datain =data.frame(value=round(runif(n_objects,0,100)),weights = 0))

target_value=12000


### ALL OUR PREDEFINED FUNCTIONS ----------------------------------

# Score a solution
# We calculate the score by taking the sum of the squares of our overcapacity (so we punish very large overcapacity on a day)
score_solution <- function(solution,target_value)
{
  abs(target_value-sum(solution$value*solution$weights))
}

# Merge solutions
# Get approx. 50% of tasks from solution1, and the remaining tasks from solution 2.
merge_solutions <- function(solution1,solution2)
{
  solution1$weights = ifelse(runif(nrow(solution1),0,1)>0.5,solution1$weights,solution2$weights)
  return(solution1)
}

# Randomize solution
# Create an initial solution
randomize_solution <- function(solution)
{
  solution$weights = sample(c(-1,0,1),nrow(solution),replace=T)
  return(solution)
}

# sort population based on scores
sort_pop <- function(population)
{
  return(population[order(sapply(population,function(x) {x[['score']]}),decreasing = F)])
}

# return the scores of a population
pop_scores <- function(population)
{
  sapply(population,function(x) {x[['score']]})
}



### RUN SCRIPT -------------------------------

# starting score
print(paste0('Starting score: ',score_solution(datain,target_value)))

# Create initial population
population = vector('list',n_population)
for(i in 1:n_population)
{
  # create initial solutions by making changes to the initial solution 
  solution = randomize_solution(datain)
  score = score_solution(solution,target_value)
  population[[i]] = list('solution' = solution,'score'= score)
}

population = sort_pop(population)

score_per_iteration <- score_solution(datain,target_value)
# Run the algorithm
for(i in 1:n_iterations)
{
  print(paste0('\n---- Iteration',i,' -----\n'))
  # create some random perturbations in the population
  for(j in 1:10)
  {
    sol_to_change = sample(2:n_population,1)
    new_solution <- randomize_solution(population[[sol_to_change]][['solution']])
    new_score <- score_solution(new_solution,target_value)
    population[[sol_to_change]] <- list('solution' = new_solution,'score'= new_score)
  }

  # Create offspring, first determine which solutions to combine
  # determine the probability that a solution will be selected to create offspring (some smoothing)
  probs = sapply(population,function(x) {x[['score']]})
  if(max(probs)==min(probs)){stop('No diversity in population left')}
  probs = 1-(probs-min(probs))/(max(probs)-min(probs))+0.2
  # create combinations
  solutions_to_combine = lapply(1:n_offspring_per_iter, function(y){
    sample(seq(length(population)),2,prob = probs)})
  for(j in 1:n_offspring_per_iter)
  {
    new_solution <- merge_solutions(population[[solutions_to_combine[[j]][1]]][['solution']],
                                    population[[solutions_to_combine[[j]][2]]][['solution']])
    new_score <- score_solution(new_solution,target_value)
    population[[length(population)+1]] <- list('solution' = new_solution,'score'= new_score)
  }
  population = sort_pop(population)
  population= population[1:n_population]
  print(paste0('Best score:',population[[1]]['score']))
  score_per_iteration = c(score_per_iteration,population[[1]]['score'])
  if(i>early_stopping_rounds+1)
  {
    if(score_per_iteration[[i]] == score_per_iteration[[i-10]])
    {
      stop(paste0("Score not improved in the past ",early_stopping_rounds," rounds. Halting algorithm."))
    }
  }
}

plot(x=seq(0,length(score_per_iteration)-1),y=score_per_iteration,xlab = 'iteration',ylab='score')
final_solution = population[[1]][['solution']]

是否有R包用于解决-1/0/1背包？

1 个答案: