惊人的挑战产生全面的清单

时间:2014-05-25 20:30:22

标签: python list set unique

我正面临着Python的惊人挑战。

我是一名物理学家,在光学界面上生成一系列图层模拟。模拟的细节并不是特别重要,但重要的是我生成了所有可能的情况 - 厚度和层次顺序范围内的不同材料。

我一直在编写代码来生成一个全面而独特的列表,但我却想知道计算甚至相对简单的系统需要多长时间!当然,Python和一台合理的计算机应该可以在没有过多压力的情况下处理建议将不胜感激。

谢谢

from itertools import permutations, combinations_with_replacement

def join_adjacent_repeated_materials(potential_structure):
    """
    Self-explanitory...
    """
    #print potential_structure

    new_layers = [] # List to hold re-cast structure
    for layer in potential_structure:
        if len(new_layers) > 0: # if not the first item in the list of layers
            last_layer=new_layers[-1] # last element of existing layer list
            if layer[0] == last_layer[0]: # true is the two layers are the same material
                combined_layer = (layer[0], layer[1] + last_layer[1])
                new_layers[len(new_layers)-1] = combined_layer
            else: # adjcent layers are different material so no comibantion is possible
                new_layers.append(layer)
        else: # for the first layer
            new_layers.append(layer)

    return tuple(new_layers)

def calculate_unique_structure_lengths(thicknesses, materials, maximum_number_of_layers,\
                                       maximum_individual_layer_thicknesses, \
                                       maximum_total_material_thicknesses):
    """
    Create a set on all possible multilayer combinations.

    thicknesses : if this contains '0' the total number of layers will vary
                  from 0 to maximum_number_of_layers, otherwise, the
                  number total number layers will always be maximum_number_of_layers
                  e.g. arange(0 , 100, 5)

    materials : list of materials used
                e.g. ['Metal', 'Dielectric']

    maximum_number_of_layers : pretty self-explanitory...
                               e.g. 5

    maximum_individual_layer_thicknesses : filters the created the multilayer structures
                                           preventing the inclusion layers that are too thick
                                           - this is important after the joining of
                                           adjacent materials
                                           e.g. (('Metal',30),('Dielectric',20))

    maximum_total_material_thicknesses : similar to the above but filters structures where the total
                                         amount of a particular material is exceeded
                                         e.g. (('Metal',50),('Dielectric',100))


    """
    # generate all possible thickness combinations and material combinations
    all_possible_thickness_sets = set(permutations(combinations_with_replacement(thicknesses, maximum_number_of_layers)))
    all_possible_layer_material_orders = set(permutations(combinations_with_replacement(materials, maximum_number_of_layers)))


    first_set = set() # Create set object (list of unique elements, no repeats)
    for layer_material_order in all_possible_layer_material_orders:
        for layer_thickness_set in all_possible_thickness_sets:
            potential_structure = [] # list to hold this structure
            for layer, thickness in zip(layer_material_order[0], layer_thickness_set[0]): # combine the layer thickness with its material
                if thickness != 0: # layers of zero thickness are not added to potential_structure
                    potential_structure.append((layer, thickness))
            first_set.add(tuple(potential_structure)) # add this potential_structure to the first_set set

    #print('first_set')
    #for struct in first_set:
    #    print struct

    ## join adjacent repeated materials
    second_set = set() # create new set
    for potential_structure in first_set:
        second_set.add(join_adjacent_repeated_materials(potential_structure))

    ## remove structures where a layer is too thick
    third_set = set()
    for potential_structure in second_set: # check all the structures in the set
        conditions_satisfied=True # default
        for max_condition in maximum_individual_layer_thicknesses: # check this structure using each condition
            for layer in potential_structure: # examine each layer
                if layer[0] == max_condition[0]: # match condition with material
                    if layer[1] > max_condition[1]: # test thickness condition
                        conditions_satisfied=False
        if conditions_satisfied:
            third_set.add(potential_structure)

    ##remove structures that contain too much of a certain material
    fourth_set = set()
    for potential_structure in second_set: # check all the structures in the set
        conditions_satisfied=True # default
        for max_condition in maximum_total_material_thicknesses: # check this structure using each condition
            amount_of_material_in_this_structure = 0 # initialise a counter
            for layer in potential_structure: # examine each layer
                if layer[0] == max_condition[0]: # match condition with material
                    amount_of_material_in_this_structure += layer[1]
                    if amount_of_material_in_this_structure > max_condition[1]: # test thickness condition
                        conditions_satisfied=False
        if conditions_satisfied:
            fourth_set.add(potential_structure)

    return fourth_set

thicknesses = [0,1,2]
materials = ('A', 'B') # Tuple cannot be accidentally appended to later
maximum_number_of_layers = 3
maximum_individual_layer_thicknesses=(('A',30),('B',20))
maximum_total_material_thicknesses=(('A',20),('B',15))

calculate_unique_structure_lengths(thicknesses, materials, maximum_number_of_layers,\
                                   maximum_individual_layer_thicknesses = maximum_individual_layer_thicknesses, \
                                   maximum_total_material_thicknesses = maximum_total_material_thicknesses)

3 个答案:

答案 0 :(得分:2)

all_possible_thickness_sets = set(permutations(combinations_with_replacement(thicknesses, maximum_number_of_layers)))
all_possible_layer_material_orders = set(permutations(combinations_with_replacement(materials, maximum_number_of_layers)))

神圣的废话!这些套装将是巨大的!让我们举个例子。如果thicknesses中包含6个内容且maximum_number_of_layers为3,那么第一个集合中将包含大约2个五分之一的内容。你为什么做这个?如果这些确实是您想要使用的集合,那么您将需要找到一个不需要构建这些集合的算法,因为它永远不会发生。我怀疑这些不是你想要的套装;也许你想要itertools.product

all_possible_thickness_sets = set(product(thicknesses, repeat=maximum_number_of_layers))

以下是itertools.product所做的一个例子:

>>> for x in product([1, 2, 3], repeat=2):
...     print x
...
(1, 1)
(1, 2)
(1, 3)
(2, 1)
(2, 2)
(2, 3)
(3, 1)
(3, 2)
(3, 3)

这看起来像你需要的吗?

答案 1 :(得分:0)

因此,您经常做的一件事就是向set添加内容。如果你看一下Python documentation中集合的运行时行为,它会在最底层,最坏情况和#34;单个行为可能会花费相当长的时间,具体取决于容器的历史记录"。我认为如果添加大量元素,内存重新分配可能会让你感到困惑,因为Python无法知道启动时要保留多少内存。

我看得越多,我就越认为你保留的内存比你需要的多得多。例如,third_set甚至无法使用。如果您只是直接致电second_setfirst_set可以替换为join_adjacent_materials。如果我正确阅读,即使first_set可能会消失,您也可以在构建fourth_set时创建候选人。

当然,如果将所有内容放入一组嵌套循环中,代码可能会变得不那么可读。但是,有一些方法可以构建代码,只为了可读性而不创建不必要的对象 - 例如,您可以创建一个生成候选项的函数,并通过yield返回每个结果。

答案 2 :(得分:0)

FWIW我检测了您的code以启用性能分析。结果如下:

输出:

Sun May 25 16:06:31 2014    surprising-challenge-generating-comprehensive-python-list.stats

         348,365,046 function calls in 1,538.413 seconds

   Ordered by: cumulative time, internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1 1052.933 1052.933 1538.413 1538.413 surprising-challenge-generating-comprehensive-python-list.py:34(calculate_unique_structure_lengths)
 87091200  261.764    0.000  261.764    0.000 {zip}
 87091274  130.492    0.000  130.492    0.000 {method 'add' of 'set' objects}
174182440   93.223    0.000   93.223    0.000 {method 'append' of 'list' objects}
       30    0.000    0.000    0.000    0.000 surprising-challenge-generating-comprehensive-python-list.py:14(join_adjacent_repeated_materials)
      100    0.000    0.000    0.000    0.000 {len}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}


为了获得代码花费时间的更细粒度的图片,我在代码的几乎逐字副本上使用line_profiler模块,并为每个函数获得以下结果:

> python "C:\Python27\Scripts\kernprof.py" -l -v surprising-challenge-generating-comprehensive-python-list.py
Wrote profile results to example.py.lprof
Timer unit: 3.2079e-07 s

File: surprising-challenge-generating-comprehensive-python-list.py
Function: join_adjacent_repeated_materials at line 3
Total time: 0.000805183 s

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
     3                                           @profile
     4                                           def join_adjacent_repeated_materials(potential_structure):
     5                                               """
     6                                               Self-explanitory...
     7                                               """
     8                                               #print potential_structure
     9
    10        30          175      5.8      7.0      new_layers = [] # List to hold re-cast structure
    11       100          544      5.4     21.7      for layer in potential_structure:
    12        70          416      5.9     16.6          if len(new_layers) > 0: # if not the first item in the list of layers
    13        41          221      5.4      8.8              last_layer=new_layers[-1] # last element of existing layer list
    14        41          248      6.0      9.9              if layer[0] == last_layer[0]: # true is the two layers are the same material
    15        30          195      6.5      7.8                  combined_layer = (layer[0], layer[1] + last_layer[1])
    16        30          203      6.8      8.1                  new_layers[len(new_layers)-1] = combined_layer
    17                                                       else: # adjcent layers are different material so no comibantion is possible
    18        11           68      6.2      2.7                  new_layers.append(layer)
    19                                                   else: # for the first layer
    20        29          219      7.6      8.7              new_layers.append(layer)
    21
    22        30          221      7.4      8.8      return tuple(new_layers)
File: surprising-challenge-generating-comprehensive-python-list.py
Function: calculate_unique_structure_lengths at line 24
Total time: 3767.94 s

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
    24                                           @profile
    25                                           def calculate_unique_structure_lengths(thicknesses, materials, maximum_number_of_layers,\
    26                                                                                  maximum_individual_layer_thicknesses, \
    27                                                                                  maximum_total_material_thicknesses):
    28                                               """
    29                                               Create a set on all possible multilayer combinations.
    30
    31                                               thicknesses : if this contains '0' the total number of layers will vary
    32                                                             from 0 to maximum_number_of_layers, otherwise, the
    33                                                             number total number layers will always be maximum_number_of_layers
    34                                                             e.g. arange(0 , 100, 5)
    35
    36                                               materials : list of materials used
    37                                                           e.g. ['Metal', 'Dielectric']
    38
    39                                               maximum_number_of_layers : pretty self-explanitory...
    40                                                                          e.g. 5
    41
    42                                               maximum_individual_layer_thicknesses : filters the created the multilayer structures
    43                                                                                      preventing the inclusion layers that are too thick
    44                                                                                      - this is important after the joining of
    45                                                                                      adjacent materials
    46                                                                                      e.g. (('Metal',30),('Dielectric',20))
    47
    48                                               maximum_total_material_thicknesses : similar to the above but filters structures where the total
    49                                                                                    amount of a particular material is exceeded
    50                                                                                    e.g. (('Metal',50),('Dielectric',100))
    51
    52
    53                                               """
    54                                               # generate all possible thickness combinations and material combinations
    55         1     20305240 20305240.0    0.2      all_possible_thickness_sets = set(permutations(combinations_with_replacement(thicknesses, maximum_number_of_layers)))
    56         1          245    245.0      0.0      all_possible_layer_material_orders = set(permutations(combinations_with_replacement(materials, maximum_number_of_layers)))
    57
    58
    59         1           13     13.0      0.0      first_set = set() # Create set object (list of unique elements, no repeats)

    60        25          235      9.4      0.0      for layer_material_order in all_possible_layer_material_orders:
    61  87091224    896927052     10.3      7.6          for layer_thickness_set in all_possible_thickness_sets:
    62  87091200    920048586     10.6      7.8              potential_structure = [] # list to hold this structure
    63 348364800   4160332176     11.9     35.4              for layer, thickness in zip(layer_material_order[0], layer_thickness_set[0]): # combine the layer thickness with its material
    64 261273600   2334038439      8.9     19.9                  if thickness != 0: # layers of zero thickness are not added to potential_structure
    65 174182400   2003639625     11.5     17.1                      potential_structure.append((layer, thickness))
    66  87091200   1410517427     16.2     12.0              first_set.add(tuple(potential_structure)) # add this potential_structure to the first_set set
    67
    68                                               #print('first_set')
    69                                               #for struct in first_set:
    70                                               #    print struct
    71
    72                                               ## join adjacent repeated materials
    73         1           14     14.0      0.0      second_set = set() # create new set
    74        31          274      8.8      0.0      for potential_structure in first_set:
    75        30         5737    191.2      0.0          second_set.add(join_adjacent_repeated_materials(potential_structure))
    76
    77                                               ## remove structures where a layer is too thick
    78         1           10     10.0      0.0      third_set = set()
    79        23          171      7.4      0.0      for potential_structure in second_set: # check all the structures in the set
    80        22          164      7.5      0.0          conditions_satisfied=True # default
    81        66          472      7.2      0.0          for max_condition in maximum_individual_layer_thicknesses: # check this structure using each condition
    82       104          743      7.1      0.0              for layer in potential_structure: # examine each layer
    83        60          472      7.9      0.0                  if layer[0] == max_condition[0]: # match condition with material
    84        30          239      8.0      0.0                      if layer[1] > max_condition[1]: # test thickness condition
    85                                                                   conditions_satisfied=False
    86        22          149      6.8      0.0          if conditions_satisfied:
    87        22          203      9.2      0.0              third_set.add(potential_structure)
    88
    89                                               ##remove structures that contain too much of a certain material
    90         1           10     10.0      0.0      fourth_set = set()
    91        23          178      7.7      0.0      for potential_structure in second_set: # check all the structures in the set
    92        22          158      7.2      0.0          conditions_satisfied=True # default
    93        66          489      7.4      0.0          for max_condition in maximum_total_material_thicknesses: # check this structure using each condition
    94        44          300      6.8      0.0              amount_of_material_in_this_structure = 0 # initialise a counter
    95       104          850      8.2      0.0              for layer in potential_structure: # examine each layer
    96        60         2961     49.4      0.0                  if layer[0] == max_condition[0]: # match condition with material
    97        30          271      9.0      0.0                      amount_of_material_in_this_structure += layer[1]
    98        30          255      8.5      0.0                      if amount_of_material_in_this_structure > max_condition[1]: # test thickness condition
    99                                                                   conditions_satisfied=False
   100        22          160      7.3      0.0          if conditions_satisfied:
   101        22          259     11.8      0.0              fourth_set.add(potential_structure)
   102
   103         1           16     16.0      0.0      return fourth_set

正如您所看到的,构建first_set calculate_unique_structure_lengths()是迄今为止最耗时的步骤。