代码运行非常缓慢的文件名未知-python profiler

时间:2018-12-24 11:27:53

标签: python performance numpy profiler

我有以下代码:

def min_dist(aux):


""" Function to obtain the minimum distance in the 2 matching problem. 
    Input: the dataframe given to us. 
    Output: minimum distance and the matches (this will actually be written in a csv). 
    """

# Let's create an array that stores the distances.
dist = []

# We will also create an array with passengers compared so we don't double our information.
compared = []

# An array for us to store the overlaps
gain = []

# Store which overlaps
gain_loc =[]

# Finally one to obtain the tuple.
matches = []

test1 = 0
test2 = 0
for p1 in range(0,len(aux)):

    """ Since the maximum of passengers is 2, there are 4 different possible outcomes: it could be:
    1) start passenger 1, start passenger 2, end passanger 1, end passenger 2
    2) start passenger 1, start passenger 2, end passanger 2, end passenger 1 
    3) start passenger 2, start passenger 1, end passanger 1, end passenger 2
    4) start passenger 2, start passenger 1, end passanger 2, end passenger 1  
    """

    for p2 in range(0,len(aux)):

        if p1 != p2: # We evidently don't want to pickup the same passenger two times.

            if [p2,p1] not in compared: # [p1,p2] can't be repeated because of the construction of compared and the for loop.

                alone = M_dist(p1,p1,"start","end",aux) # We also want to check it alone, since there could be unmached rides.
                compared.append([p1])
                alone2 = M_dist(p2,p2,"start","end",aux)
                ind_drives = alone + alone2 # We get the independent distance for both rides.

                # Let's get now the 4 outcomes and take the minimum one.
                out1 = M_dist(p1,p2,"start","start",aux) + M_dist(p2,p1,"start","end",aux) + M_dist(p1,p2,"end","end",aux)
                out2 = M_dist(p1,p2,"start","start",aux) + M_dist(p2,p2,"start","end",aux) + M_dist(p2,p1,"end","end",aux)
                out3 = M_dist(p2,p1,"start","start",aux) + M_dist(p1,p1,"start","end",aux) + M_dist(p1,p2,"end","end",aux)
                out4 = M_dist(p2,p1,"start","start",aux) + M_dist(p1,p2,"start","end",aux) + M_dist(p2,p1,"end","end",aux)

                together = [out1,out2,out3,out4] # Lets put them together in order to get the minimum
                minimum = np.min(together) # We could merge the two lines but I believe it makes it easier to see if we separate them. 

                # Save the fact that the comparition was done.
                compared.append([p1,p2])
                overlap = ind_drives - minimum
                # Now we will get the overlap between the two rides. We do not care about negative overlaps since we can just get two rides instead.
                if overlap >= 0: # We made it equal because it there is no loss, we can still save energy :D !

                    dist.append(minimum)

                    # We will store them in order of start. 
                    if minimum == out1:
                        match = [p1,p2,p1,p2]
                        matches.append(match)
                        gain.append(overlap) # How much overlap we get between the two rides.
                        gain_loc.append(match)
                    elif minimum == out2:
                        match = [p1,p2,p1,p2]
                        matches.append(match)
                        gain.append(overlap) # How much overlap we get between the two rides.
                        gain_loc.append(match)

                    elif minimum == out3:
                        match = [p1,p2,p1,p2]
                        matches.append(match)
                        gain.append(overlap) # How much overlap we get between the two rides.
                        gain_loc.append(match)

                    elif minimum == out4: 
                        match = [p1,p2,p1,p2]
                        matches.append(match)
                        gain.append(overlap) # How much overlap we get between the two rides.
                        gain_loc.append(match)

                # In case we actually get negative overlaps, we will save the individual values. 
                else:

                    # We do not want them repeated, so we check if they are already there.
                    if alone not in dist and alone2 not in dist:
                        dist.append(alone)
                        dist.append(alone2)
                        matches.append([p1,p1])
                        matches.append([p2,p2])

                    elif alone not in dist:
                        dist.append(alone)
                        matches.append([p1,p1])

                    elif alone2 not in dist:
                        dist.append(alone2)
                        matches.append([p2,p2])

    # Now we have the the overlaps, the tuples that contain said overlaps and all the distances.
    # We want now to take the aproach said in the notebook and start with the biggest overlap and then go down.

f_match = []
indexes = np.argsort(gain)[::-1]
for index in indexes:
     if not bool(set(np.unique(f_match)) & set(gain_loc[index])):
        f_match.append(gain_loc[index])

used = []
# Get unique passengers already used
for j in np.unique(f_match): 
    for i in np.unique(j):
        used.append(i)

all_p = list(range(len(aux)))
for x in np.unique(used): all_p.remove(x) # We take all the passengers that do not have someone to share a ride
for p in all_p: f_match.append([p,p]) # We add them as un matched

# We now have the final order, we just want to get the minimum distance.

dist_aux = []
for f in f_match:
    for i in range(0,len(matches)):
        if f == matches[i]:
            dist_aux.append(dist[i])
f_dist = np.sum(dist_aux)


print('The minimum distance is: ', f_dist, '\n')

return None

其中的函数是:

def M_dist(p1,p2,order1,order2,requests): 

    """  Function to calculate manhattan distance between 1 or 2 passangers based on lattitude and logitude.
         We will used the formula M(p1,p2) = 84.2|p1(lat) - p2(lat)| + 111.2|p1(lng) - p2(lng)|.
         The input of our function will be 2 ride id's and if the passangers will picked or left (start,end)
         Our output is the distance. """

    if order1 == "start":
        if order2 == "start":
            # We will take the lattitud difference.
            lat = requests[p1][1] - requests[p2][1]
            # And the longitude
            lng = requests[p1][2] - requests[p2][2]

        if order2 == "end":
            # We will take the lattitud difference.
            lat = requests[p1][1] - requests[p2][3]
            # And the longitude
            lng = requests[p1][2] - requests[p2][4]

    if order1 == "end":
        if order2 == "end":
            # We will take the lattitud difference.
            lat = requests[p1][3] - requests[p2][3]
            # And the longitude
            lng = requests[p1][4] - requests[p2][4]

    # We will use the values obtained to get the manhattan distance. 
    dist = 84.2*np.abs(lat) + 111.2*np.abs(lng)

    return float(dist) # We want it as a float.

我运行了100个元素的代码,花了0.008秒,我试图运行200个元素,时间是8秒。它增长的数量是不可思议的,应该是O(n ^ 2),而且看起来并不像它。

但是,当尝试使用事件探查器获取信息时,有90%的时间有一个呼叫。

     234757 function calls in 10.308 seconds

订购者:标准名称

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    8.726    8.726   10.306   10.306 <ipython-input-41-436df7df7549>:1(min_dist)

我不知道它是什么,更不知道如何提高它的效率。

0 个答案:

没有答案