我正在尝试使用整洁的方法解决openaigym的沃克问题
问题在下面 https://gym.openai.com/envs/BipedalWalker-v2/
我尝试使用整洁的python解决此问题,但是即使经过1000代,解决方案仍未解决。 我该怎么办?
avg_fitness
特殊化
图
from __future__ import print_function
import os
import neat
import visualize
import gym
import numpy as np
from gym import wrappers
env = gym.make("BipedalWalker-v2")
def eval_genomes(genomes, config):
global env
global MAX_STEPS
for genome_id, genome in genomes:
genome.fitness = 0
net = neat.nn.FeedForwardNetwork.create(genome, config)
observation = env.reset()
total_reward = 0
for _ in range(1):
observatin = env.reset()
episode_reward = 0
while True:
action = net.activate(observation)
action = np.clip(action,-1,1)
observation,reward,done,info = env.step(action)
episode_reward += reward
if done:
total_reward += episode_reward
break
genome.fitness = total_reward / 1
def run(config_file):
global env
# Load configuration.
config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
neat.DefaultSpeciesSet, neat.DefaultStagnation,
config_file)
# Create the population, which is the top-level object for a NEAT run.
p = neat.Population(config)
# Add a stdout reporter to show progress in the terminal.
p.add_reporter(neat.StdOutReporter(True))
stats = neat.StatisticsReporter()
p.add_reporter(stats)
p.add_reporter(neat.Checkpointer(5))
# Run for up to 300 generations.
winner = p.run(eval_genomes, 200)
# Display the winning genome.
print('\nBest genome:\n{!s}'.format(winner))
# Show output of the most
#fit genome against training data.
print('\nOutput:')
#winner_net = neat.nn.FeedForwardNetwork.create(winner, config)
visualize.draw_net(config, winner, True,)
visualize.plot_stats(stats, ylog=False, view=True)
visualize.plot_species(stats, view=True)
winner_net = neat.nn.FeedForwardNetwork.create(winner, config)
if __name__ == '__main__':
# Determine path to configuration file. This path manipulation is
# here so that the script will run successfully regardless of the
# current working directory.
local_dir = os.path.dirname(__file__)
config_path = os.path.join(local_dir, 'config-feedforward')
run(config_path)
这是一个超级参数
fitness_criterion =平均值
fitness_threshold = 300
pop_size = 100
reset_on_extinction =真
no_fitness_termination = 1
activation_default = relu
activation_mutate_rate = 0.3
activation_options =乙状结肠
aggregation_default =总和
aggregation_mutate_rate = 0.3
aggregation_options =平均值
bias_init_mean = 0
bias_init_stdev = 1.0
bias_max_value = 100
bias_min_value = -100
bias_mutate_power = 0.5
bias_mutate_rate = 0.7
bias_replace_rate = 0.1
compatibility_disjoint_coefficient = 1.0
compatibility_weight_coefficient = 0.5
conn_add_prob = 0.5
conn_delete_prob = 0.5
enabled_default = True
enabled_mutate_rate = 0.01
feed_forward =真
initial_connection =已满
node_add_prob = 0.3
node_delete_prob = 0.3
num_hidden = 0
num_inputs = 24
num_outputs = 4
response_init_mean = 2
response_init_stdev = 1
response_max_value = 5.0
response_min_value = -5.0
response_mutate_power = 0.0
response_mutate_rate = 0.0
response_replace_rate = 0.0
weight_init_mean = 0.5
weight_init_stdev = 1.0
weight_max_value = 30
weight_min_value = -30
weight_mutate_power = 0.5
weight_mutate_rate = 0.8
weight_replace_rate = 0.1
compatibility_threshold = 3.0
species_fitness_func =平均值
max_stagnation = 20
species_elitism = 3
精英= 3
survival_threshold = 0.3