假设我有一个数字S = [6,2,1,7,4,3,9,5,3,1]的数组。我想分成三个数组。数字的顺序和这些数组中的项目数无关紧要。
假设A1,A2和A3是子阵列。我想最小化功能
f(x) = ( SUM(A1) - SUM(S) / 3 )^2 / 3 +
( SUM(A2) - SUM(S) / 3 )^2 / 3 +
( SUM(A3) - SUM(S) / 3 )^2 / 3
为什么我需要解决这个问题?我想把盒子整理成三列,这样每根柱子的总高度就不会太大了。
我的第一直觉是使用贪心。结果并不是那么糟糕,但它无法确保最佳解决方案。还有更好的方法吗?
s = [6, 2, 1, 7, 4, 3, 9, 5, 3, 1]
s = sorted(s, reverse=True)
a = [[], [], []]
sum_a = [0, 0, 0]
for x in s:
i = sum_a.index(min(sum_a))
sum_a[i] += x
a[i].append(x)
print(a)
答案 0 :(得分:7)
正如你所说,你不介意非最佳解决方案,我会重新使用你的初始函数,并添加一种方法来为你的初始列表找到一个好的起始安排s
您的初始功能:
def pigeon_hole(s):
a = [[], [], []]
sum_a = [0, 0, 0]
for x in s:
i = sum_a.index(min(sum_a))
sum_a[i] += x
a[i].append(x)
return map(sum, a)
这是一种为列表找到合理的初始排序的方法,它通过按排序和反向排序顺序创建列表的旋转来工作。一旦列表被列入了洞穴,就可以通过最小化标准差来找到最佳轮换:
def rotate(l):
l = sorted(l)
lr = l[::-1]
rotation = [np.roll(l, i) for i in range(len(l))] + [np.roll(lr, i) for i in range(len(l))]
blocks = [pigeon_hole(i) for i in rotation]
return rotation[np.argmin(np.std(blocks, axis=1))] # the best rotation
import random
print pigeon_hole(rotate([random.randint(0, 20) for i in range(20)]))
# Testing with some random numbers, these are the sums of the three sub lists
>>> [64, 63, 63]
虽然这可以进一步优化,但是对于20个数字来说它很快就需要0.0013秒。使用a = rotate(range(1, 30))
# This method
a = rotate(range(1, 30))
>>> [[29, 24, 23, 18, 17, 12, 11, 6, 5], [28, 25, 22, 19, 16, 13, 10, 7, 4, 1], [27, 26, 21, 20, 15, 14, 9, 8, 3, 2]]
map(sum, a)
# Sum's to [145, 145, 145] in 0.002s
# Mo Tao's method
>>> [[25, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1], [29, 26, 20, 19, 18, 17, 16], [28, 27, 24, 23, 22, 21]]
# Sum's to [145, 145, 145] in 1.095s
在许多情况下,这种方法似乎也找到了最佳解决方案,尽管这可能不适用于所有情况。使用30个数字列表对Mo Tao的答案进行500次测试,并比较子列表总和是否相同:
c = 0
for i in range(500):
r = [random.randint(1, 10) for j in range(30)]
res = pigeon_hole(rotate(r))
d, e = sorted(res), sorted(tao(r)) # Comparing this to the optimal solution by Mo Tao
if all([k == kk] for k, kk in zip(d, e)):
c += 1
memory = {}
best_f = pow(sum(s), 3)
best_state = None
>>> 500 # (they do)
我想我会在这里提供更多优化版本的功能:
def rotate2(l):
# Calculate an acceptable minimum stdev of the pigeon holed list
if sum(l) % 3 == 0:
std = 0
else:
std = np.std([0, 0, 1])
l = sorted(l, reverse=True)
best_rotation = None
best_std = 100
for i in range(len(l)):
rotation = np.roll(l, i)
sd = np.std(pigeon_hole(rotation))
if sd == std:
return rotation # If a min stdev if found
elif sd < best_std:
best_std = sd
best_rotation = rotation
return best_rotation
主要的变化是,一旦找到合适的旋转,搜索良好的顺序就会停止。也只搜索反向排序列表,它似乎不会改变结果。用
计时print timeit.timeit("rotate2([random.randint(1, 10) for i in range(30)])", "from __main__ import rotate2, random", number=1000) / 1000.
导致大幅加速。在我当前的计算机rotate
上大约需要1.84毫秒而rotate2
需要大约0.13毫秒,所以大约需要14倍的加速。为了比较,我的机器上的实现大约需要0.99毫秒。
答案 1 :(得分:4)
正如我在问题的评论中提到的,这是直接的动态编程方法。 s = range(1, 30)
只需不到1秒钟,并提供优化的解决方案。
如果你知道Memoization,我认为代码是自我解释的。
s = range(1, 30)
# s = [6, 2, 1, 7, 4, 3, 9, 5, 3, 1]
n = len(s)
memory = {}
best_f = pow(sum(s), 3)
best_state = None
def search(state, pre_state):
global memory, best_f, best_state
s1, s2, s3, i = state
f = s1 * s1 + s2 * s2 + s3 * s3
if state in memory or f >= best_f:
return
memory[state] = pre_state
if i == n:
best_f = f
best_state = state
else:
search((s1 + s[i], s2, s3, i + 1), state)
search((s1, s2 + s[i], s3, i + 1), state)
search((s1, s2, s3 + s[i], i + 1), state)
search((0, 0, 0, 0), None)
a = [[], [], []]
state = best_state
while state[3] > 0:
pre_state = memory[state]
for j in range(3):
if state[j] != pre_state[j]:
a[j].append(s[pre_state[3]])
state = pre_state
print a
print best_f, best_state, map(sum, a)
答案 2 :(得分:3)
我们可以研究您在找到的列表之间替换元素时找到的解决方案的稳定性。下面我放置了我的代码。如果我们通过替换使目标函数更好,我们保持找到列表并进一步希望我们将通过另一个替换再次使该函数更好。作为起点,我们可以采取您的解决方案。最终结果将是局部最小值。
from copy import deepcopy
s = [6, 2, 1, 7, 4, 3, 9, 5, 3, 1]
s = sorted(s, reverse=True)
a = [[], [], []]
sum_a = [0, 0, 0]
for x in s:
i = sum_a.index(min(sum_a))
sum_a[i] += x
a[i].append(x)
def f(a):
return ((sum(a[0]) - sum(s) / 3.0)**2 + (sum(a[1]) - sum(s) / 3.0)**2 + (sum(a[2]) - sum(s) / 3.0)**2) / 3
fa = f(a)
while True:
modified = False
# placing
for i_from, i_to in [(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]:
for j in range(len(a[i_from])):
a_new = deepcopy(a)
a_new[i_to].append(a_new[i_from][j])
del a_new[i_from][j]
fa_new = f(a_new)
if fa_new < fa:
a = a_new
fa = fa_new
modified = True
break
if modified:
break
# replacing
for i_from, i_to in [(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]:
for j_from in range(len(a[i_from])):
for j_to in range(len(a[i_to])):
a_new = deepcopy(a)
a_new[i_to].append(a_new[i_from][j_from])
a_new[i_from].append(a_new[i_to][j_to])
del a_new[i_from][j_from]
del a_new[i_to][j_to]
fa_new = f(a_new)
if fa_new < fa:
a = a_new
fa = fa_new
modified = True
break
if modified:
break
if modified:
break
if not modified:
break
print(a, f(a)) # [[9, 3, 1, 1], [7, 4, 3], [6, 5, 2]] 0.2222222222222222222
有趣的是,即使我们从任意a
:
from copy import deepcopy
s = [6, 2, 1, 7, 4, 3, 9, 5, 3, 1]
def f(a):
return ((sum(a[0]) - sum(s) / 3.0)**2 + (sum(a[1]) - sum(s) / 3.0)**2 + (sum(a[2]) - sum(s) / 3.0)**2) / 3
a = [s, [], []]
fa = f(a)
while True:
modified = False
# placing
for i_from, i_to in [(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]:
for j in range(len(a[i_from])):
a_new = deepcopy(a)
a_new[i_to].append(a_new[i_from][j])
del a_new[i_from][j]
fa_new = f(a_new)
if fa_new < fa:
a = a_new
fa = fa_new
modified = True
break
if modified:
break
# replacing
for i_from, i_to in [(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]:
for j_from in range(len(a[i_from])):
for j_to in range(len(a[i_to])):
a_new = deepcopy(a)
a_new[i_to].append(a_new[i_from][j_from])
a_new[i_from].append(a_new[i_to][j_to])
del a_new[i_from][j_from]
del a_new[i_to][j_to]
fa_new = f(a_new)
if fa_new < fa:
a = a_new
fa = fa_new
modified = True
break
if modified:
break
if modified:
break
if not modified:
break
print(a, f(a)) # [[3, 9, 2], [6, 7], [4, 3, 1, 1, 5]] 0.2222222222222222222
它提供了不同的结果,但功能的值相同。
答案 3 :(得分:3)
我不得不说你的贪婪功能确实产生了良好的效果,但如果输入的大小超过100,则往往变得很慢。
但是,您已经说过您的输入大小固定在范围内 - 10,30
。因此,贪婪的解决方案实际上是相当不错的。而不是在开始时变得太贪婪。我建议起初有点懒惰,最后变得贪婪。
这是一个改变的函数lazy
:
def lazy(s):
k = (len(s)//3-2)*3 #slice limit
s.sort(reverse=True)
#Perform limited extended slicing
a = [s[1:k:3],s[2:k:3],s[:k:3]]
sum_a = list(map(sum,a))
for x in s[k:]:
i = sum_a.index(min(sum_a))
sum_a[i] += x
a[i].append(x)
return a
它的作用是首先按降序对输入进行排序并逐个填充三个子列表中的项目,直到剩下大约6个项目为止。(您可以更改此限制并进行测试,但尺寸为10-30我认为这是最好的)
当这样做时,只需继续贪婪的方法。这种方法比平均贪婪的解决方案花费的时间更少,更准确。
以下是尺寸与时间的线图 -
和尺寸与准确度 -
准确度是与最终子列表和原始列表的平均值的标准偏差。因为您希望列堆叠在几乎相似的高度而不是(原始列表的平均值)高度。
此外,项目值的范围介于3-15
之间,因此您提及的总和大约为100-150
。
这些是测试功能 -
def test_accuracy():
rsd = lambda s:round(math.sqrt(sum([(sum(s)//3-y)**2 for y in s])/3),4)
sm = lambda s:list(map(sum,s))
N=[i for i in range(10,30)]
ST=[]
MT=[]
for n in N:
case = [r(3,15) for x in range(n)]
ST.append(rsd(sm(lazy(case))))
MT.append(rsd(sm(pigeon(case))))
strace = go.Scatter(x=N,y=ST,name='Lazy pigeon')
mtrace = go.Scatter(x=N,y=MT,name='Pigeon')
data = [strace,mtrace]
layout = go.Layout(
title='Uniform distribution in 3 sublists',
xaxis=dict(title='List size',),
yaxis=dict(title='Accuracy - Standard deviation',))
fig = go.Figure(data=data, layout=layout)
plotly.offline.plot(fig,filename='N vs A2.html')
def test_timings():
N=[i for i in range(10,30)]
ST=[]
MT=[]
for n in N:
case = [r(3,15) for x in range(n)]
start=time.clock()
lazy(case)
ST.append(time.clock()-start)
start=time.clock()
pigeon(case)
MT.append(time.clock()-start)
strace = go.Scatter(x=N,y=ST,name='Lazy pigeon')
mtrace = go.Scatter(x=N,y=MT,name='Pigeon')
data = [strace,mtrace]
layout = go.Layout(
title='Uniform distribution in 3 sublists',
xaxis=dict(title='List size',),
yaxis=dict(title='Time (seconds)',))
fig = go.Figure(data=data, layout=layout)
plotly.offline.plot(fig,filename='N vs T2.html')
以下是完整的file。
修改 -
我测试了kezzos答案的准确性并且表现非常好。偏差一直小于.8。
100次运行的平均标准偏差。
Lazy Pigeon Pigeon Rotation 1.10668795 1.1573573 0.54776425
在速度的情况下,旋转功能的顺序相当高。但是,10 ^ -3是好的,除非你想重复运行该函数。
Lazy Pigeon Pigeon Rotation 5.384013e-05 5.930269e-05 0.004980
这是比较所有三个功能的准确性的条形图。 -
总而言之,如果你的速度很好,kezzos解决方案是最好的。
拙劣的Html文件 - versus time,versus accuracy和the bar chart。
答案 4 :(得分:1)
这是我对Korf的1顺序号码分区(SNP)的坚定实现,但它只使用Karmarkar-Karp而不是完整的Karmarkar-Karp进行双向分区(I& #39;包括一个未使用的,有些不满意的version of CKK - 或许有人建议让它更有效率?)。
在第一个子集上,它放置下限和上限。请参阅参考文章。我确信可以实现更高效的实施。修改MAX_ITERATIONS
以获得更好的结果与更长的等待时间:)
顺便说一下,函数KK3
(用于计算第一个下限的Karmarkar-Karp到三向分区的扩展)本身看起来相当不错。
from random import randint
from collections import Counter
from bisect import insort
from time import time
def KK3(s):
s = list(map(lambda x: (x,0,0,[],[],[x]),sorted(s)))
while len(s) > 1:
large = s.pop()
small = s.pop()
combined = sorted([large[0] + small[2], large[1] + small[1],
large[2] + small[0]],reverse=True)
combined = list(map(lambda x: x - combined[2],combined))
combined = combined + sorted((large[3] + small[5], large[4] +
small[4], large[5] + small[3]),key = sum)
insort(s,tuple(combined))
return s
#s = [6, 2, 1, 7, 4, 3, 9, 5, 3, 1]
s = [randint(0,100) for r in range(0,30)]
# global variables
s = sorted(s,reverse=True)
sum_s = sum(s)
upper_bound = sum_s // 3
lower_bound = sum(KK3(s)[0][3])
best = (sum_s,([],[],[]))
iterations = 0
MAX_ITERATIONS = 10000
def partition(i, accum):
global lower_bound, best, iterations
sum_accum = sum(accum)
if sum_accum > upper_bound or iterations > MAX_ITERATIONS:
return
iterations = iterations + 1
if sum_accum >= lower_bound:
rest = KK(diff(s,accum))[0]
new_diff = sum(rest[1]) - sum_accum
if new_diff < best[0]:
best = (new_diff,(accum,rest[1],rest[2]))
lower_bound = (sum_s - 2 * new_diff) // 3
print("lower_bound: " + str(lower_bound))
if not best[0] in [0,1] and i < len(s) - 1 and sum(accum) + sum(s[i
+ 1:]) > lower_bound:
_accum = accum[:]
partition(i + 1, _accum + [s[i]])
partition(i + 1, accum)
def diff(l1,l2):
return list((Counter(l1) - Counter(l2)).elements())
def KK(s):
s = list(map(lambda x: (x,[x],[]),sorted(s)))
while len(s) > 1:
large = s.pop()
small = s.pop()
insort(s,(large[0] - small[0],large[1] + small[2],large[2] + small[1]))
return s
print(s)
start_time = time()
partition(0,[])
print(best)
print("iterations: " + str(iterations))
print("--- %s seconds ---" % (time() - start_time))
1 Richard E. Korf,加州大学洛杉矶分校计算机科学系多路数字分区; aaai.org/ocs/index.php/IJCAI/IJCAI-09/paper/viewFile/625/705