使用以下基准:
def create_genome
"gattaca" * 100
end
def count_frequency_using_chars(sequence)
100000.times do
sequence.chars.group_by{|x| x}.map{|letter, array| [letter, array.count]}
end
end
def count_frequency_using_count(sequence)
100000.times do
["a", "c", "g", "t"].map{|letter| sequence.count(letter)}
end
end
sequence = create_genome
count_frequency_using_chars(sequence)
count_frequency_using_count(sequence)
我发现,在1.8和1.9.2的基于C的Ruby中,使用String#count(letter)
比使用Enumerable#group_by
和Array#count
排序和计算它们快大约50倍。我对此感到有些惊讶,因为String#count
方法每次迭代读取字符串四次,而后者只读取一次。
我尝试在ruby-prof和perftools.rb下运行代码,并且他们都只是表示String#chars
占用了90%的时间,没有分解90%的时间用在哪里
如果我不得不猜测为什么会有差异,我会说创造7000万个单字符串会很贵,但我怎么能知道呢? (更新:String#chars
不是罪魁祸首 - 请参阅mainly_execute_a_trivial_block
的基准
编辑:使用1.9.2补丁级别180的当前基准:
require 'pp'
require 'benchmark'
def create_genome
"gattaca" * 100
end
ZILLION = 100000
def count_frequency_using_count(sequence)
ZILLION.times do
["a", "c", "g", "t"].map{|letter| sequence.count(letter)}
end
end
def count_frequency_using_chars(sequence)
ZILLION.times do
sequence.chars.group_by{|x| x}.map{|letter, array| [letter, array.count]}
end
end
def count_frequency_using_inject_hash(sequence)
ZILLION.times do
sequence.chars.inject(Hash.new(0)) { |h, e| h[e] += 1 ; h }
end
end
def count_frequency_using_each_with_object(sequence)
ZILLION.times do
sequence.chars.each_with_object(Hash.new(0)) { |char, hash| hash[char] += 1}
end
end
def just_group_by(sequence)
ZILLION.times do
sequence.chars.group_by{|x| x}
end
end
def just_chars_and_trivial_block(sequence)
ZILLION.times do
sequence.chars() {}
end
end
def mainly_execute_a_trivial_block(sequence)
ZILLION.times do
sequence.length.times() {}
end
end
def execute_an_empty_loop_instead(sequence)
ZILLION.times do
i = 0
max = sequence.length
until i == max
i += 1
end
end
end
sequence = create_genome
puts RUBY_VERSION
Benchmark.bm do |benchmark|
benchmark.report do
count_frequency_using_count(sequence)
end
benchmark.report do
count_frequency_using_chars(sequence)
end
benchmark.report do
count_frequency_using_inject_hash(sequence)
end
benchmark.report do
count_frequency_using_each_with_object(sequence)
end
benchmark.report do
just_group_by(sequence)
end
benchmark.report do
just_chars_and_trivial_block(sequence)
end
benchmark.report do
mainly_execute_a_trivial_block(sequence)
end
benchmark.report do
execute_an_empty_for_loop_instead(sequence)
end
end
结果:
user system total real
0.510000 0.000000 0.510000 ( 0.508499) # count_frequency_using_count
23.470000 0.030000 23.500000 ( 23.575487) # count_frequency_using_chars
32.770000 0.050000 32.820000 ( 32.874634) # count_frequency_using_inject_hash
31.880000 0.040000 31.920000 ( 31.942437) # count_frequency_using_each_with_object
22.950000 0.030000 22.980000 ( 22.970500) # just_group_by
13.300000 0.020000 13.320000 ( 13.314466) # just_chars_and_trivial_block
5.660000 0.000000 5.660000 ( 5.661516) # mainly_execute_a_trivial_block
1.930000 0.010000 1.940000 ( 1.934861) # execute_an_empty_loop_instead
答案 0 :(得分:2)
它与红宝石内部无关。你正在比较苹果和橘子。
在第一个示例中,您将700个字符串分组100000次并查找计数。所以这是你的逻辑问题。在第二种方法中,你只是在计算,在这两种方法中,您只使用计数
只需像这样更改第一个例子
def count_frequency_using_chars(sequence)
grouped = sequence.chars.group_by{|x| x}
100000.times do
grouped.map{|letter, array| [letter, array.count]}
end
end
它和你的第二个一样快
修改强>
这种方法比count_frequency_using_count
快3倍,检查基准
def count_frequency_using_chars_with_single_group(sequence)
grouped = sequence.chars.group_by{|x| x}
100000.times do
grouped.map{|letter, array| [letter, array.count]}
end
end
def count_frequency_using_count(sequence)
100000.times do
["a", "c", "g", "t"].map{|letter| sequence.count(letter)}
end
end
Benchmark.bm do |benchmark|
benchmark.report do
pp count_frequency_using_chars_with_single_group(sequence)
end
benchmark.report do
pp count_frequency_using_count(sequence)
end
end
user system total real
0.410000 0.000000 0.410000 ( 0.419100)
1.330000 0.000000 1.330000 ( 1.324431)
measuring the character composition of 100000 sequences once each, not the character composition of one sequence 100000 times
,你的计数方法仍然比group_by方法慢得多。我正如你所说的那样对大字符串进行基准测试
seq = "gattaca" * 10000
#seq length is 70000
arr_seq = (1..10).map {|x| seq}
#10 seq items
并更改了处理多个序列的方法
def count_frequency_using_chars_with_single_group(sequences)
sequences.each do |sequence|
grouped = sequence.chars.group_by{|x| x}
100000.times do
grouped.map{|letter, array| [letter, array.count]}
end
end
end
def count_frequency_using_count(sequence)
sequences.each do |sequence|
100000.times do
["a", "c", "g", "t"].map{|letter| sequence.count(letter)}
end
end
end
Benchmark.bm do |benchmark|
benchmark.report do
pp count_frequency_using_chars_with_single_group(arr_seq)
end
benchmark.report do
pp count_frequency_using_count(arr_seq)
end
end
处理100000次,每次有700个长度的10个序列
user system total real
3.710000 0.040000 3.750000 ( 23.452889) #modified group_by approach
1039.180000 6.920000 1046.100000 (1071.374730) #simple char count approach
对于高音量字符串,您的简单字符计数方法比修改后的group_by方法慢47%。我运行了上述基准测试,只有10个序列,每个序列长度为70000。假设有100或1000个序列,简单计数永远不会是一个选项。正确?
答案 1 :(得分:0)
那个慢的东西是group_by。实际上,虽然你需要为count方法执行4次传递,但group_by方法要慢得多,因为为了做那个group_by,它做了很多工作。
稍微破解代码以获得仅通过以下方式执行分组的方法:
def create_genome
"gattaca" * 100
end
def count_frequency_using_chars(sequence)
100000.times do
sequence.chars.group_by{|x| x}.map{|letter, array| [letter, array.count]}
end
end
def count_frequency_using_count(sequence)
100000.times do
["a", "c", "g", "t"].map{|letter| sequence.count(letter)}
end
end
def just_group_by(sequence)
100000.times do
sequence.chars.group_by{|x| x}
end
end
sequence = create_genome
...
ruby-1.9.2-p180 :068 > puts Time.now()
2011-06-17 11:17:36 -0400
ruby-1.9.2-p180 :069 > count_frequency_using_chars(sequence)
=> 100000
ruby-1.9.2-p180 :070 > puts Time.now()
2011-06-17 11:18:07 -0400
ruby-1.9.2-p180 :071 > count_frequency_using_count(sequence)
=> 100000
ruby-1.9.2-p180 :072 > puts Time.now()
2011-06-17 11:18:08 -0400
ruby-1.9.2-p180 :073 > just_group_by(sequence)
=> 100000
ruby-1.9.2-p180 :074 > puts Time.now()
2011-06-17 11:18:37 -0400
你可以看到
虽然使用group_by来获取所需的信息在概念上是好的,但它正在做额外的工作而你不需要做。
答案 2 :(得分:0)
更快更慢的方法是只传递一次数组。
hash = sequence.chars.inject(Hash.new(0)) { |h, e| h[e] += 1 ; h }
=> {"g"=>100, "a"=>300, "t"=>200, "c"=>100}
但实际上它不更快
我还是留下了答案,因为它显示了你如何使用标准库中的Benchmark
。
require 'pp'
require 'benchmark'
def count_frequency_using_inject_hash(sequence)
100000.times do
sequence.chars.inject(Hash.new(0)) { |h, e| h[e] += 1 ; h }
end
end
sequence = create_genome
Benchmark.bm do |benchmark|
benchmark.report do
pp count_frequency_using_chars(sequence)
end
benchmark.report do
pp count_frequency_using_count(sequence)
end
benchmark.report do
pp count_frequency_using_inject_hash(sequence)
end
end
user system total real 41.500000 0.000000 41.500000 ( 42.484375) 1.312000 0.000000 1.312000 ( 1.406250) 49.265000 0.000000 49.265000 ( 49.348633)
答案 3 :(得分:0)
通过分析VM本身,您可以更好地了解正在发生的事情。
产生过多次的街区是罪魁祸首。如果您对VM使用perftools'proferr,请使用https://github.com/tmm1/perftools.rb下的“分析Ruby VM和C扩展”下列出的说明(注意:这或多或少是vanilla perftools,而不是perftools.rb)
Removing _init from all stack traces.
Total: 3883 samples
1321 34.0% 34.0% 3883 100.0% rb_yield_0
273 7.0% 41.1% 274 7.1% _IO_str_pbackfail
191 4.9% 46.0% 191 4.9% __i686.get_pc_thunk.bx
171 4.4% 50.4% 171 4.4% _init
131 3.4% 53.7% 3880 99.9% rb_eval
122 3.1% 56.9% 347 8.9% st_lookup
105 2.7% 59.6% 423 10.9% new_dvar
93 2.4% 62.0% 326 8.4% rb_newobj
89 2.3% 64.3% 89 2.3% _setjmp
77 2.0% 66.3% 400 10.3% str_new
67 1.7% 68.0% 357 9.2% dvar_asgn_internal
63 1.6% 69.6% 204 5.3% malloc
62 1.6% 71.2% 3820 98.4% rb_str_each_char
58 1.5% 72.7% 187 4.8% rb_ary_store
55 1.4% 74.1% 55 1.4% rb_memcmp
55 1.4% 75.5% 3883 100.0% rb_yield
# rest snipped for brevity
正如您所看到的,rb_yield_0
占活动的三分之一以上,因此即使您可以优化其他所有内容,您仍然会比使用String#count
时慢。< / p>
您还可以通过执行基准测试来确认这一点,您只需创建一个不执行任何操作的块。
require 'pp'
require 'benchmark'
def create_genome
"gattaca" * 100
end
ZILLION = 100000
def mainly_execute_a_trivial_block(sequence)
ZILLION.times do
sequence.length.times() {}
end
end
def execute_an_empty_loop_instead(sequence)
ZILLION.times do
i = 0
max = sequence.length
until i == max
i += 1
end
end
end
sequence = create_genome
puts RUBY_VERSION
Benchmark.bm do |benchmark|
benchmark.report do
pp mainly_execute_a_trivial_block(sequence)
end
benchmark.report do
pp execute_an_empty_loop_instead(sequence)
end
end
给出
user system total real
5.700000 0.000000 5.700000 ( 5.727715) # mainly_execute_a_trivial_block
1.920000 0.000000 1.920000 ( 1.942096) # execute_an_empty_loop_instead