我正在尝试
In [21]: l1 = range(1,1000000)
In [22]: l2 = range(100,90000)
In [23]: l1.append(101)
In [24]: print(set([x for x in l1 if l1.count(x) - l2.count(x) == 1]))
在我的python shell中需要很长时间。一般来说,我的目标是在处理重复项时从第二个列表中减去列表。
e.g
[1,2,2,3] - [2,3] = [1,2]
我很高兴任何提示如何在常规单核机器上完成最多500毫秒的工作。
答案 0 :(得分:5)
collections.Counter
非订单保留:
from collections import Counter
a = Counter([1, 2, 2, 3])
b = Counter([2, 3])
res = list(a - b )
# [1, 2]
这是有效的,因为-
的{{1}}方法会从输出中删除Counter
中的计数等于或大于b
中的计数的所有元素
a
订单保留,然后手动生成列表,例如:
OrderedCounter
最后,如果原始范围包含非唯一值,并且您希望元素重复在减法后剩余的次数,则:
from collections import Counter, OrderedDict
class OrderedCounter(Counter, OrderedDict):
pass
a = OrderedCounter([3, 2, 2, 1])
b = Counter([2, 3])
res = [k for k, v in a.items() if v - b[k] > 0]
# [2, 1]
答案 1 :(得分:0)
我认为我会使用 Output Result Set
WForecastDate | TimeStamp | Temp100 | Temp101 | Temp102 | Humidity100 | Humidity101 | Humidity102 | Humidity103 | WindSpeed100 | WindSpeed101 | WindSpeed102
----------------------------------------------------------------------------
,然后将减去两个计数器的结果转换为排序列表:
Counter
答案 2 :(得分:0)
首先计算元素:
from collections import defaultdict
count1 = defaultdict(int)
count2 = defaultdict(int)
for x in l1:
count1[x] += 1
for x in l2:
count2[x] += 1
print([x for x, count in count1.iteritems() if count - count2[x] == 1])
(不要忘记将l1
转换为最后一行中的一组)
以上代码在我的机器上需要625ms。 (没有打印结果到stdout)
答案 3 :(得分:0)
执行时间:
import time
from functools import wraps
from collections import defaultdict
from collections import Counter, OrderedDict
class OrderedCounter(Counter, OrderedDict):pass
def tefn(fn):
t = time.time()
set(fn())
return t - time.time()
def efn(fn):
t = time.time()
fn()
return t - time.time()
def runTime(tp=0, count=10):
def dec(fn):
@wraps(fn)
def wrap():
print(fn)
res = tefn if tp else efn
return sum(res(fn) for _ in range(count)) / count
return wrap
return dec
@runTime(tp=1)
def fnList():
return [x for x in l1 if l1.count(x) - l2.count(x) == 1]
@runTime(tp=1)
def fnIter():
return iter(x for x in l1 if l1.count(x) - l2.count(x) == 1)
@runTime(tp=1)
def fnYield():
for x in l1:
if l1.count(x) - l2.count(x) == 1:
yield x
@runTime()
def fnCounter():
a = Counter(l1)
b = Counter(l2)
return list(a - b )
@runTime()
def fnOrderedCounter():
a = OrderedCounter(l1)
b = Counter(l2)
return [k for k, v in a.items() if v - b[k] > 0]
@runTime()
def fnDefaultdict():
count1 = defaultdict(int)
count2 = defaultdict(int)
for x in l1: count1[x] += 1
for x in l2: count2[x] += 1
return [x for x, count in count1.items() if count - count2[x] == 1]
if __name__ == '__main__':
l1 = range(1, 1000000)
l2 = range(100, 90000)
g = globals()
result = list((fn.__name__, fn()) for fn in (g[f] for f in g if f.startswith('fn')))
result.sort(key=lambda r: r[1], reverse=True)
for e, r in enumerate(result):
print(e, r)
OUT:
<function fnList at 0x02F17540>
<function fnIter at 0x034C8DF8>
<function fnCounter at 0x034C8F18>
<function fnDefaultdict at 0x034CB078>
<function fnYield at 0x034C8E88>
<function fnOrderedCounter at 0x034C8FA8>
0 ('fnYield', -0.8542306900024415)
1 ('fnList', -0.8605266094207764)
2 ('fnIter', -0.8655695915222168)
3 ('fnDefaultdict', -1.054802918434143)
4 ('fnCounter', -1.3413111925125123)
5 ('fnOrderedCounter', -5.433168196678162)
答案 4 :(得分:0)
我了解到合并模式更快(阅读http://openbookproject.net/thinkcs/python/english3e/list_algorithms.html#alice-in-wonderland-again):
6 def substract_list(origin, substract):
7 """
8 example: db tells us that user bought shares 1, 2, 2, 3, 4 and also sold
9 1, 2, 2. we need to know that he now owns 3, 4 for 10m item
10
11 learned from here:
12 http://openbookproject.net/thinkcs/python/english3e/list_algorithms.html#alice-in-wonderland-again
13
14 lists need to be sorted and can contain duplicates
15 """
16 result = []
17 xi = 0 # @origin
18 yi = 0 # @subscract
19
20 len_substract = len(substract)
21 len_origin = len(origin)
22
23 while True:
24 # reached end of substract, append rest of origin, return
25 if yi >= len_substract:
26 result.extend(origin[xi:])
27 return result
28
29 # reached end of origin, return
30 if xi >= len_origin:
31 return result
32
33 # step throught the values
34 if origin[xi] == substract[yi]:
35 # equal values, next one pls
36 yi += 1
37 xi += 1
38 elif origin[xi] > substract[yi]:
39 yi += 1
40 else:
41 result.append(origin[xi])
42 xi += 1
我的机器上的计数器:3-4s,合并模式:0.3s