我有以下代码:
def min_dist(aux):
""" Function to obtain the minimum distance in the 2 matching problem.
Input: the dataframe given to us.
Output: minimum distance and the matches (this will actually be written in a csv).
"""
# Let's create an array that stores the distances.
dist = []
# We will also create an array with passengers compared so we don't double our information.
compared = []
# An array for us to store the overlaps
gain = []
# Store which overlaps
gain_loc =[]
# Finally one to obtain the tuple.
matches = []
test1 = 0
test2 = 0
for p1 in range(0,len(aux)):
""" Since the maximum of passengers is 2, there are 4 different possible outcomes: it could be:
1) start passenger 1, start passenger 2, end passanger 1, end passenger 2
2) start passenger 1, start passenger 2, end passanger 2, end passenger 1
3) start passenger 2, start passenger 1, end passanger 1, end passenger 2
4) start passenger 2, start passenger 1, end passanger 2, end passenger 1
"""
for p2 in range(0,len(aux)):
if p1 != p2: # We evidently don't want to pickup the same passenger two times.
if [p2,p1] not in compared: # [p1,p2] can't be repeated because of the construction of compared and the for loop.
alone = M_dist(p1,p1,"start","end",aux) # We also want to check it alone, since there could be unmached rides.
compared.append([p1])
alone2 = M_dist(p2,p2,"start","end",aux)
ind_drives = alone + alone2 # We get the independent distance for both rides.
# Let's get now the 4 outcomes and take the minimum one.
out1 = M_dist(p1,p2,"start","start",aux) + M_dist(p2,p1,"start","end",aux) + M_dist(p1,p2,"end","end",aux)
out2 = M_dist(p1,p2,"start","start",aux) + M_dist(p2,p2,"start","end",aux) + M_dist(p2,p1,"end","end",aux)
out3 = M_dist(p2,p1,"start","start",aux) + M_dist(p1,p1,"start","end",aux) + M_dist(p1,p2,"end","end",aux)
out4 = M_dist(p2,p1,"start","start",aux) + M_dist(p1,p2,"start","end",aux) + M_dist(p2,p1,"end","end",aux)
together = [out1,out2,out3,out4] # Lets put them together in order to get the minimum
minimum = np.min(together) # We could merge the two lines but I believe it makes it easier to see if we separate them.
# Save the fact that the comparition was done.
compared.append([p1,p2])
overlap = ind_drives - minimum
# Now we will get the overlap between the two rides. We do not care about negative overlaps since we can just get two rides instead.
if overlap >= 0: # We made it equal because it there is no loss, we can still save energy :D !
dist.append(minimum)
# We will store them in order of start.
if minimum == out1:
match = [p1,p2,p1,p2]
matches.append(match)
gain.append(overlap) # How much overlap we get between the two rides.
gain_loc.append(match)
elif minimum == out2:
match = [p1,p2,p1,p2]
matches.append(match)
gain.append(overlap) # How much overlap we get between the two rides.
gain_loc.append(match)
elif minimum == out3:
match = [p1,p2,p1,p2]
matches.append(match)
gain.append(overlap) # How much overlap we get between the two rides.
gain_loc.append(match)
elif minimum == out4:
match = [p1,p2,p1,p2]
matches.append(match)
gain.append(overlap) # How much overlap we get between the two rides.
gain_loc.append(match)
# In case we actually get negative overlaps, we will save the individual values.
else:
# We do not want them repeated, so we check if they are already there.
if alone not in dist and alone2 not in dist:
dist.append(alone)
dist.append(alone2)
matches.append([p1,p1])
matches.append([p2,p2])
elif alone not in dist:
dist.append(alone)
matches.append([p1,p1])
elif alone2 not in dist:
dist.append(alone2)
matches.append([p2,p2])
# Now we have the the overlaps, the tuples that contain said overlaps and all the distances.
# We want now to take the aproach said in the notebook and start with the biggest overlap and then go down.
f_match = []
indexes = np.argsort(gain)[::-1]
for index in indexes:
if not bool(set(np.unique(f_match)) & set(gain_loc[index])):
f_match.append(gain_loc[index])
used = []
# Get unique passengers already used
for j in np.unique(f_match):
for i in np.unique(j):
used.append(i)
all_p = list(range(len(aux)))
for x in np.unique(used): all_p.remove(x) # We take all the passengers that do not have someone to share a ride
for p in all_p: f_match.append([p,p]) # We add them as un matched
# We now have the final order, we just want to get the minimum distance.
dist_aux = []
for f in f_match:
for i in range(0,len(matches)):
if f == matches[i]:
dist_aux.append(dist[i])
f_dist = np.sum(dist_aux)
print('The minimum distance is: ', f_dist, '\n')
return None
其中的函数是:
def M_dist(p1,p2,order1,order2,requests):
""" Function to calculate manhattan distance between 1 or 2 passangers based on lattitude and logitude.
We will used the formula M(p1,p2) = 84.2|p1(lat) - p2(lat)| + 111.2|p1(lng) - p2(lng)|.
The input of our function will be 2 ride id's and if the passangers will picked or left (start,end)
Our output is the distance. """
if order1 == "start":
if order2 == "start":
# We will take the lattitud difference.
lat = requests[p1][1] - requests[p2][1]
# And the longitude
lng = requests[p1][2] - requests[p2][2]
if order2 == "end":
# We will take the lattitud difference.
lat = requests[p1][1] - requests[p2][3]
# And the longitude
lng = requests[p1][2] - requests[p2][4]
if order1 == "end":
if order2 == "end":
# We will take the lattitud difference.
lat = requests[p1][3] - requests[p2][3]
# And the longitude
lng = requests[p1][4] - requests[p2][4]
# We will use the values obtained to get the manhattan distance.
dist = 84.2*np.abs(lat) + 111.2*np.abs(lng)
return float(dist) # We want it as a float.
我运行了100个元素的代码,花了0.008秒,我试图运行200个元素,时间是8秒。它增长的数量是不可思议的,应该是O(n ^ 2),而且看起来并不像它。
但是,当尝试使用事件探查器获取信息时,有90%的时间有一个呼叫。
234757 function calls in 10.308 seconds
订购者:标准名称
ncalls tottime percall cumtime percall filename:lineno(function)
1 8.726 8.726 10.306 10.306 <ipython-input-41-436df7df7549>:1(min_dist)
我不知道它是什么,更不知道如何提高它的效率。