基于代理的模型python

时间:2019-06-28 15:05:46

标签: python performance numpy oop game-theory

我最初将此内容发布在代码审查中(因此,冗长的代码),但未能获得答案。

我的模型基于该游戏https://en.wikipedia.org/wiki/Ultimatum_game。我不会直言不讳,但总的来说,它的功能如下:

  1. 游戏由一个n x n的格子组成,在每个节点上放置一个代理。

  2. 在每个时间步骤中,每个节点上的每个玩家都通过玩特定策略来对抗随机邻居。

  3. 他们的每项策略(介于1到9之间的值)都有一个倾向(随机分配,只是一些数字)。然后,倾向决定了执行该策略的可能性。概率被计算为该策略的倾向超过所有策略的倾向总和。

  4. 如果一个游戏产生正收益,那么该游戏的收益将被添加到这些策略的倾向中。

  5. 然后,这些倾向确定下一步策略的可能性,依此类推。

  6. 在达到时间步长N之后,模拟结束。

对于具有较大网格和较大时间步长的游戏,我的代码运行非常缓慢。我运行cProfiler来检查瓶颈在哪里,并且我怀疑update_probabilitiesplay_rounds函数似乎占用了很多时间。我希望能够以约40x40的网格大小运行游戏约100000多个时间步,但是现在这还没有发生。

那么计算和更新网格中每个玩家的概率/倾向的更有效方法是什么?我已经考虑过实现NumPy数组,但是我不确定在这里是否值得麻烦?


import numpy as np
import random
from random import randint
from numpy.random import choice
from numpy.random import multinomial
import cProfile

mew = 0.001 
error = 0.05

def create_grid(row, col):
    return [[0 for j in range(col)] for i in range(row)]

def create_random_propensities():
    propensities = {}
    pre_propensities = [random.uniform(0, 1) for i in range(9)]
    a = np.sum(pre_propensities)
    for i in range(1, 10):
        propensities[i] = (pre_propensities[i - 1]/a) * 10 # normalize sum of propensities to 10
    return propensities

class Proposer:
    def __init__(self):
        self.propensities = create_random_propensities()
        self.probabilites = []
        self.demand = 0 # the amount the proposer demands for themselves

    def pick_strat(self, n_trials): # gets strategy, an integer in the interval [1, 9]
        results = multinomial(n_trials, self.probabilites)
        i, = np.where(results == max(results))
        if len(i) > 1:
            return choice(i) + 1
        else:
            return i[0] + 1

    def calculate_probability(self, dict_data, index, total_sum): # calculates probability for particular strat, taking propensity
        return dict_data[index]/total_sum                           # of that strat as input

    def calculate_sum(self, dict_data):
        return sum(dict_data.values())

    def initialize(self):
        init_sum = self.calculate_sum(self.propensities)
        for strategy in range(1, 10):
            self.probabilites.append(self.calculate_probability(self.propensities, strategy, init_sum)) 
        self.demand = self.pick_strat(1)

    def update_strategy(self):
        self.demand = self.pick_strat(1)

    def update_probablities(self):
        for i in range(9):
            self.propensities[1 + i] *= 1 - mew 
        pensity_sum = self.calculate_sum(self.propensities)
        for i in range(9):
            self.probabilites[i] = self.calculate_probability(self.propensities, 1 + i, pensity_sum)

    def update(self):
        self.update_probablities()
        self.update_strategy()

class Responder: # methods same as proposer class, can skip-over
    def __init__(self):
        self.propensities = create_random_propensities()
        self.probabilites = []
        self.max_thresh = 0 # the maximum demand they are willing to accept 

    def pick_strat(self, n_trials):
        results = multinomial(n_trials, self.probabilites)
        i, = np.where(results == max(results))
        if len(i) > 1:
            return choice(i) + 1
        else:
            return i[0] + 1

    def calculate_probability(self, dict_data, index, total_sum):
        return dict_data[index]/total_sum

    def calculate_sum(self, dict_data):
        return sum(dict_data.values())

    def initialize(self):
        init_sum = self.calculate_sum(self.propensities)
        for strategy in range(1, 10):
            self.probabilites.append(self.calculate_probability(self.propensities, strategy, init_sum)) 
        self.max_thresh = self.pick_strat(1)

    def update_strategy(self):
        self.max_thresh = self.pick_strat(1)

    def update_probablities(self):
        for i in range(9):
            self.propensities[1 + i] *= 1 - mew # stops sum of propensites from growing without bound
        pensity_sum = self.calculate_sum(self.propensities)
        for i in range(9):
            self.probabilites[i] = self.calculate_probability(self.propensities, 1 + i, pensity_sum)

    def update(self):
        self.update_probablities()
        self.update_strategy()

class Agent:
    def __init__(self):
        self.prop_side = Proposer()
        self.resp_side = Responder()
        self.prop_side.initialize()
        self.resp_side.initialize()

    def update_all(self):
        self.prop_side.update()
        self.resp_side.update()

class Grid:
    def __init__(self, rowsize, colsize):
        self.rowsize = rowsize
        self.colsize = colsize

    def make_lattice(self):
        return [[Agent() for j in range(self.colsize)] for i in range(self.rowsize)]

    @staticmethod
    def von_neumann_neighbourhood(array, row, col, wrapped=True): # gets up, bottom, left, right neighbours of some node
        neighbours = set([])

        if row + 1 <= len(array) - 1:
            neighbours.add(array[row + 1][col])

        if row - 1 >= 0:
            neighbours.add(array[row - 1][col])

        if col + 1 <= len(array[0]) - 1:
            neighbours.add(array[row][col + 1])

        if col - 1 >= 0:    
            neighbours.add(array[row][col - 1])
        #if wrapped is on, conditions for out of bound points
        if row - 1 < 0 and wrapped == True:
            neighbours.add(array[-1][col])

        if col - 1 < 0 and wrapped == True:
            neighbours.add(array[row][-1])

        if row + 1 > len(array) - 1 and wrapped == True:
            neighbours.add(array[0][col])

        if col + 1 > len(array[0]) - 1 and wrapped == True:
            neighbours.add(array[row][0])
        return neighbours

def get_error_term(pay, strategy):
    index_strat_2, index_strat_8 = 2, 8
    if strategy == 1:
        return (1 - (error/2)) * pay, error/2 * pay, index_strat_2
    if strategy == 9:
        return (1 - (error/2)) * pay, error/2 * pay, index_strat_8
    else:
        return (1 - error) * pay, error/2 * pay, 0

class Games:
    def __init__(self, n_rows, n_cols, n_rounds):
        self.rounds = n_rounds
        self.rows = n_rows
        self.cols = n_cols
        self.lattice = Grid(self.rows, self.cols).make_lattice()
        self.lookup_table = np.full((self.rows, self.cols), False, dtype=bool)  # if player on grid has updated their strat, set to True 

    def reset_look_tab(self):
        self.lookup_table = np.full((self.rows, self.cols), False, dtype=bool)

    def run_game(self):
        n = 0
        while n < self.rounds:
            for r in range(self.rows):
                for c in range(self.cols):
                    if n != 0:
                        self.lattice[r][c].update_all() 
                        self.lookup_table[r][c] = True
                    self.play_rounds(self.lattice, r, c)
            self.reset_look_tab()
            n += 1

    def play_rounds(self, grid, row, col):  
        neighbours = Grid.von_neumann_neighbourhood(grid, row, col)
        neighbour = random.sample(neighbours, 1).pop() 
        neighbour_index = [(ix, iy) for ix, row in enumerate(self.lattice) for iy, i in enumerate(row) if i == neighbour]
        if self.lookup_table[neighbour_index[0][0]][neighbour_index[0][1]] == False: # see if neighbour has already updated their strat
            neighbour.update_all()                                                      
        player = grid[row][col]
        coin_toss = randint(0, 1) # which player acts as proposer or responder in game
        if coin_toss == 1:
            if player.prop_side.demand <= neighbour.resp_side.max_thresh: # postive payoff
                payoff, adjacent_payoff, index = get_error_term(player.prop_side.demand, player.prop_side.demand)
                if player.prop_side.demand == 1 or player.prop_side.demand == 9: # extreme strategies get bonus payoffs
                    player.prop_side.propensities[player.prop_side.demand] += payoff
                    player.prop_side.propensities[index] += adjacent_payoff
                else:
                    player.prop_side.propensities[player.prop_side.demand] += payoff
                    player.prop_side.propensities[player.prop_side.demand - 1] += adjacent_payoff
                    player.prop_side.propensities[player.prop_side.demand + 1] += adjacent_payoff
            else:
                return 0 # if demand > max thresh -> both get zero

        if coin_toss != 1:
            if neighbour.prop_side.demand <= player.resp_side.max_thresh:
                payoff, adjacent_payoff, index = get_error_term(10 - neighbour.prop_side.demand, player.resp_side.max_thresh)
                if player.resp_side.max_thresh == 1 or player.resp_side.max_thresh == 9:
                    player.resp_side.propensities[player.resp_side.max_thresh] += payoff
                    player.resp_side.propensities[index] += adjacent_payoff
                else:
                    player.resp_side.propensities[player.resp_side.max_thresh] += payoff
                    player.resp_side.propensities[player.resp_side.max_thresh - 1] += adjacent_payoff
                    player.resp_side.propensities[player.resp_side.max_thresh + 1] += adjacent_payoff
            else:
                return 0

#pr = cProfile.Profile()
#pr.enable()

my_game = Games(10, 10, 2000) # (rowsize, colsize, n_steps)
my_game.run_game()

#pr.disable()
#pr.print_stats(sort='time')

(对于那些可能想知道的人,get_error_term只是返回与获得正收益的策略相邻的策略的倾向,例如,如果策略8起作用,那么7和9的倾向也会被调整向上并由上述函数计算。for中的第一个update_probabilities循环只是确保倾向的总和不会无限制地增长)。

0 个答案:

没有答案