如何解决简单GA(Python)中的早熟收敛?

时间:2011-06-28 19:49:17

标签: python artificial-intelligence genetic-algorithm genetic-programming

昨天我开始探索遗传算法,当我最终得到一些基本理论时,我试图在Python上编写简单的GA,它解决了丢番图方程。我是Python和GA的新手,所以请不要严格判断我的代码。

问题

由于早熟收敛,我得不到任何结果(有一些无返回点(n-population),population [n] == population [n + i],其中i是任意整数。甚至是随机变换元素不能改变这一点,这一代很快就会退化了)

GA正在使用交叉进行繁殖,并加权选择父母。

  • Q1:我的设计错误是否存在? 代码(下)?
  • Q1.2:我是否需要添加精英主义?
  • Q1.3:我需要改变品种吗? 逻辑?
  • Q2:真的需要深层复制吗?

代码:

# -*- coding: utf-8 -*-
from random import randint
from copy import deepcopy
from math import floor
import random

class Organism:
    #initiate
    def __init__(self, alleles, fitness, likelihood):
        self.alleles = alleles
        self.fitness = fitness
        self.likelihood = likelihood
        self.result = 0
    def __unicode__(self):
        return '%s [%s - %s]' % (self.alleles, self.fitness, self.likelihood)

class  CDiophantine:
    def __init__(self, coefficients,  result):
        self.coefficients = coefficients
        self.result = result

    maxPopulation = 40
    organisms = []
    def GetGene (self,i):
        return self.organisms[i]

    def OrganismFitness (self,gene):
        gene.result = 0
        for i in range (0, len(self.coefficients)):
            gene.result += self.coefficients[i]*gene.alleles[i]
        gene.fitness = abs(gene.result - self.result)
        return gene.fitness

    def Fitness (self):
        for organism in self.organisms:
            organism.fitness = self.OrganismFitness(organism)
            if organism.fitness == 0:
                return  organism
        return None


    def MultiplyFitness (self):
        coefficientSum = 0
        for organism in self.organisms:
            coefficientSum += 1/float(organism.fitness)
        return coefficientSum

    def GenerateLikelihoods (self):
        last = 0
        multiplyFitness = self.MultiplyFitness()
        for organism in self.organisms:
            last = ((1/float(organism.fitness)/multiplyFitness)*100)
            #print '1/%s/%s*100 - %s' % (organism.fitness, multiplyFitness, last)
            organism.likelihood = last

    def Breed (self, parentOne, parentTwo):
        crossover = randint (1,len(self.coefficients)-1)
        child = deepcopy(parentOne)
        initial = 0
        final = len(parentOne.alleles) - 1
        if randint (1,100) < 50:
            father = parentOne
            mother = parentTwo
        else:
            father = parentTwo
            mother = parentOne
        child.alleles = mother.alleles[:crossover] + father.alleles[crossover:]
        if randint (1,100) < 5:
            for i in range(initial,final):    
                child.alleles[i] = randint (0,self.result)

        return child

    def CreateNewOrganisms (self):
        #generating new population
        tempPopulation = []
        for _ in self.organisms:
            iterations = 0
            father = deepcopy(self.organisms[0])
            mother = deepcopy(self.organisms[1])
            while father.alleles == mother.alleles:
                father = self.WeightedChoice()
                mother = self.WeightedChoice()
                iterations+=1
                if iterations > 35:
                    break
            kid = self.Breed(father,mother)
            tempPopulation.append(kid)
        self.organisms = tempPopulation

    def WeightedChoice (self):
        list = []
        for organism in self.organisms:
            list.append((organism.likelihood,organism))
        list = sorted((random.random() * x[0], x[1]) for x in list)
        return list[-1][1]


    def AverageFitness (self):
        sum = 0
        for organism in self.organisms:
            sum += organism.fitness
        return float(sum)/len(self.organisms)

    def AverageLikelihoods (self):
        sum = 0
        for organism in self.organisms:
            sum += organism.likelihood
        return sum/len(self.organisms)

    def Solve (self):
        solution = None
        for i in range(0,self.maxPopulation):
            alleles = []
            #
            for j in range(0, len(self.coefficients)):
                alleles.append(randint(0, self.result))
            self.organisms.append(Organism(alleles,0,0))
        solution = self.Fitness()
        if solution:
            return solution.alleles
        iterations = 0
        while not solution and  iterations <3000:
            self.GenerateLikelihoods()
            self.CreateNewOrganisms()
            solution = self.Fitness()
            if solution:
                print 'SOLUTION FOUND IN %s ITERATIONS' % iterations
                return solution.alleles
            iterations += 1
        return  -1

if __name__ == "__main__":
    diophantine = CDiophantine ([1,2,3,4],30)
    #cProfile.run('diophantine.Solve()')
    print diophantine.Solve()

尝试更改品种和加权随机选择逻辑,但没有结果。这个GA应该是工作,我不知道,什么是错的。 我知道Python上有一些GA库,我现在正试图理解它们 - 看起来它们对我来说非常复杂。抱歉错误,英语不是我的母语。谢谢你的理解。

NECROUPDATE: 将染色体存储在格雷码中,而不是整数。

1 个答案:

答案 0 :(得分:3)

轻微的逻辑错误:parentTwo比母亲更可能是父亲。甚至赔率是randint(1,100)<= 50,而不是randint(1,100)&lt; 50.这不会是造成这个问题的原因。

  1. 您的人口规模相当小。 40对于大多数问题来说很少。这将使它快速收敛。
  2. 精英主义会让你的人口更快收敛,而不是更慢。
  3. 如果我正确阅读,您的WeightedChoice功能似乎效率很低。我最近没有使用Python来真正理解那里发生了什么,但看着它肯定会感觉效率低下。如果你可以改进,它应该加快处理速度,以便你可以增加人口规模(并且,因为我正在计算你的算法可能至少有O(n ^ 2),那将是真的很重要。)
  4. 如此庞大的人口规模,200-300代人解决这个问题就不足为奇了。如果增加人口,就应该减少所需的世代。

    注意:我在几年前发现了一些用于解决类似问题的旧代码。它在C中,并使用锦标赛选择,但也许它可以给你一些想法:

    /*Diophantine equation solving genetic algorithm
    Copyright (C) 2009- by Joel Rein
    Licensed under the terms of the MIT License*/
    #include <stdio.h>
    #include <stdlib.h>
    #include <time.h>
    #define POP 100
    //number of variables to solve for
    #define VAR 4
    //maximum value for a) result and b) variables
    #define MAX 100 
    #define MAX_GENS 500
    //probability of crossover (otherwise just one parent will be used)
    #define CROSSOVER 0.7
    //probability of mutation (per gene)
    #define MUTATION 0.4
    //print out debug information each generation (recommended: if used, keep RUNS low)
    #define DEBUG
    //print result of each run individually
    #define PRINT_RESULT
    //how many times to run the GA
    #define RUNS 1
    
    int pop[POP][VAR], scores[POP], new[POP][VAR];
    int coefficients[VAR];
    int result=0;
    
    int score(int index){
        int sum=0;
        for(int i=0;i<VAR;i++)
            sum+=coefficients[i]*pop[index][i];
        return abs(sum-result);
    }
    
    int tournament(int size){
        int best=rand()%POP;
        for(int i=1;i<size;i++){
            int comp=rand()%POP;
            if(scores[comp]<scores[best])
                best=comp;
        }
        return best;
    }
    
    void breed(int target){
        int a=tournament(3), b=tournament(3);
        //copy a
        for(int i=0;i<VAR;i++)
            new[target][i]=pop[a][i];
        //crossover
        if((float)rand()/RAND_MAX<CROSSOVER){
            int x=rand()%VAR;
            for(int i=x;i<VAR;i++)
                new[target][i]=pop[b][i];
        }
        //mutation
        for(int i=0;i<VAR;i++)
            if((float)rand()/RAND_MAX<MUTATION)
                new[target][i]=rand()%(result*2)-result;
    }
    
    void debug(int gen, int best){
    #ifdef DEBUG
        printf("Gen: %3i Score: %3i --- ", gen, scores[best]);
        int sum=0;
        for(int i=0;i<VAR;i++){
            sum+=coefficients[i]*pop[best][i];
            printf("%3i*%3i+", coefficients[i], pop[best][i]);
        }
        printf("0= %3i (target: %i)\n", sum, result);
    #endif
    }
    
    int ga(int run){
        srand(time(NULL)+run);
        //calculate a result for the equation. 
        //this mustn't be 0, else we get division-by-zero errors while initialising & mutating.
        while(!result)
            result=rand()%MAX;
        for(int i=0;i<VAR;i++)
            coefficients[i]=rand()%result;
        //initialise population
        for(int i=0;i<POP;i++)
            for(int j=0;j<VAR;j++)
                pop[i][j]=rand()%(result*2)-result;
        //main loop
        int gen, best;
        for(gen=0;gen<MAX_GENS;gen++){
            best=0;
            //evaluate population
            for(int i=0;i<POP;i++){
                scores[i]=score(i);
                if(scores[i]<scores[best])
                    best=i;
            }
            debug(gen, best);
            if(scores[best]==0)
                break;
            //breed and replace
            for(int i=0;i<POP;i++)
                breed(i);
            for(int i=0;i<POP;i++)
                for(int j=0;j<VAR;j++)
                    pop[i][j]=new[i][j];
        }
    #ifdef PRINT_RESULT
        printf("Terminated after %4i generations with a score of %3i\n", gen, scores[best]); 
    #else
        printf(".");
    #endif
        return gen;
    }
    
    int main(){
        int total=0;
        for(int i=0;i<RUNS;i++)
            total+=ga(i);
        printf("\nAverage runtime: %i generations\n", total/RUNS);
    }