返回子集时改进哈希性能数组

时间:2018-07-16 22:09:19

标签: ruby

  1. 1。我正在尝试获取哈希数组的子集,该子集将拒绝大于> = 3的重复名称
  2. 对于每个“数字”键,以最高成本返回哈希数组的子集。如果费用冲突该数字,则返回第一个数字(该数字 转换为时间戳以反映1..24小时)

我能够通过以下代码获得结果:

a= [{:name=>"John Doe1", :number=>1, :cost=>100},
    {:name=>"John Doe3", :number=>1, :cost=>100},
    {:name=>"John Doe1", :number=>1, :cost=>100},
    {:name=>"John Doe1", :number=>1, :cost=>100},
    {:name=>"John Doe1", :number=>1, :cost=>300},
    {:name=>"John Doe2", :number=>2, :cost=>300},
    {:name=>"John Doe1", :number=>3, :cost=>400}]

a.group_by { |el| el[:name] }.
  reject { |_, v| v.size >= 3 }.
  values.
  flatten.
  sort_by {|h| h[:number]}.group_by{|el| el[:number]}.
  values.
  map{|el| el.max_by{|x| x[:cost]}}

  #=> [{:name=>"John Doe3", :number=>1, :cost=>100},
  #    {:name=>"John Doe2", :number=>2, :cost=>300}]

以上结果是否会在O(n ^ 2)

有什么更好的方法?

2 个答案:

答案 0 :(得分:2)

这似乎比您当前的解决方案更有效:(根据fruity gem约为20%)

a.group_by {|h| h[:name] }.each_with_object({}) do |(_,v),obj|
  if v.size < 3 
    v.each do |sv| 
      if obj[sv[:number]].nil? || obj[sv[:number]][:cost] < sv[:cost]
        obj[sv[:number]] = sv
      end 
    end
  end
end.sort_by(&:first).map(&:last) # or just end.values if the sort is not a requirement
#=> [{:name=>"John Doe3", :number=>1, :cost=>100},
#    {:name=>"John Doe2", :number=>2, :cost=>300}]

首先,我们按名称分组。

然后,我们建立一个新的Hash,对于成员少于3个的每个组,我们轮流浏览列表,并将成员添加到Hash的编号下(如果编号不存在或会员的费用大于当前费用。

然后我们按键(数字)排序并输出值。如果您不需要排序,则可以使用values

基准化:(由fruity gem提供) 由于原始帖子包含排序,但是接受的答案并未对两个选项进行基准测试。

代码

def cary_swoveland(a)
  a.group_by { |el| el[:name] }.
  each_value.
  reject { |v| v.size >= 3 }.
  flatten.
  group_by { |h| h[:number] }.
  transform_values { |v| v.max_by { |h| h[:cost] } }
end
def cary_swoveland_no_sort(a)
  cary_swoveland(a).values  
end
def cary_swoveland_sorted(a)
  cary_swoveland(a).sort_by(&:first).map(&:last)  
end
def engineers_mnky(a)
  a.group_by {|h| h[:name] }.each_with_object({}) do |(_,v),obj|
    if v.size < 3 
      v.each do |sv| 
        if obj[sv[:number]].nil? || obj[sv[:number]][:cost] < sv[:cost]
          obj[sv[:number]] = sv
        end 
      end
    end
  end
end
def engineers_mnky_no_sort(a)
  engineers_mnky.values
end
def engineers_mnky_sorted(a)
  engineers_mnky.sort_by(&:first).map(&:last)
end

def original_post_no_sort(a)
  a.group_by { |el| el[:name] }.
  reject { |_, v| v.size >= 3 }.
  values.
  flatten.
  group_by{|el| el[:number]}.
  values.
  map{|el| el.max_by{|x| x[:cost]}}
end

def original_post(a)
  a.group_by { |el| el[:name] }.
  reject { |_, v| v.size >= 3 }.
  values.
  flatten.sort_by {|h| h[:number]}.
  group_by{|el| el[:number]}.
  values.
  map{|el| el.max_by{|x| x[:cost]}}
end

数据

a= [{:name=>"John Doe1", :number=>1, :cost=>100},
    {:name=>"John Doe3", :number=>2, :cost=>400},
    {:name=>"John Doe3", :number=>1, :cost=>100}, 
    {:name=>"John Doe1", :number=>1, :cost=>100}, 
    {:name=>"John Doe1", :number=>1, :cost=>100}, 
    {:name=>"John Doe1", :number=>1, :cost=>300}, 
    {:name=>"John Doe2", :number=>2, :cost=>300},
    {:name=>"John Doe1", :number=>3, :cost=>400}]

结果

# UNSORTED
Running each test 524288 times. Test will take about 4 minutes.
engineers_mnky_no_sort is faster than original_post_no_sort by 30.000000000000004% ± 1.0%
original_post_no_sort is faster than cary_swoveland_no_sort by 10.000000000000009% ± 1.0%

# SORTED 
Running each test 262144 times. Test will take about 3 minutes.
engineers_mnky_sorted is faster than original_post by 19.999999999999996% ± 1.0%
original_post is faster than cary_swoveland_sorted by 10.000000000000009% ± 1.0%

数据

a = n.times.map do 
  {:name=>"John Doe#{rand(n / 2)}", :number=>(1..200).to_a.sample, :cost=>rand(1000)}
end

结果

# Run 1 (n == 500)
# UNSORTED
Running each test 8192 times. Test will take about 3 minutes.
engineers_mnky_no_sort is faster than original_post_no_sort by 19.999999999999996% ± 1.0%
original_post_no_sort is similar to cary_swoveland_no_sort

# SORTED 
Running each test 8192 times. Test will take about 4 minutes.
engineers_mnky_sorted is faster than cary_swoveland_sorted by 19.999999999999996% ± 1.0%
cary_swoveland_sorted is similar to original_post

# Run 2 (n == 1_000)
# UNSORTED
Running each test 4096 times. Test will take about 3 minutes.
engineers_mnky_no_sort is faster than original_post_no_sort by 10.000000000000009% ± 1.0%
original_post_no_sort is similar to cary_swoveland_no_sort

# SORTED 
Running each test 4096 times. Test will take about 4 minutes.
engineers_mnky_sorted is faster than cary_swoveland_sorted by 10.000000000000009% ± 1.0%
cary_swoveland_sorted is faster than original_post by 10.000000000000009% ± 1.0% 

答案 1 :(得分:2)

xmlns

此返回值似乎与问题中所要求的一致。如果每个数字的值都将是a.group_by { |el| el[:name] }. each_value. reject { |v| v.size >= 3 }. flatten. group_by { |h| h[:number] }. transform_values { |v| v.max_by { |h| h[:cost] }[:name] } #=> {1=>"John Doe3", 2=>"John Doe2"} 最大的哈希h,则只需删除h[:cost]

[:name]

或者,如果需要:

  ...
  transform_values { |v| v.max_by { |h| h[:cost] } }            
    #=> {1=>{:name=>"John Doe3", :number=>1, :cost=>100},
    #    2=>{:name=>"John Doe2", :number=>2, :cost=>300}}

这具有O(n)( ... transform_values { |v| v.max_by { |h| h[:cost] } }.values #=> [{:name=>"John Doe3", :number=>1, :cost=>100}, # {:name=>"John Doe2", :number=>2, :cost=>300}] )的计算复杂度,因为每个步骤最多需要一次通过n = a.size个元素。

步骤如下。

a.size

我们不需要此哈希的键,因此我们现在也可以提取值。

b = a.group_by { |el| el[:name] }
  #=> {"John Doe1"=>[
  #      {:name=>"John Doe1", :number=>1, :cost=>100},
  #      {:name=>"John Doe1", :number=>1, :cost=>100},
  #      {:name=>"John Doe1", :number=>1, :cost=>100},
  #      {:name=>"John Doe1", :number=>1, :cost=>300},
  #      {:name=>"John Doe1", :number=>3, :cost=>400}
  #    ],
  #    "John Doe3"=>[{:name=>"John Doe3", :number=>1, :cost=>100}],
  #    "John Doe2"=>[{:name=>"John Doe2", :number=>2, :cost=>300}]
  #   }

现在删除包含3个或更多元素的数组并将结果展平。

c = b.each_value
  #=> #<Enumerator: 
  #     {"John Doe1"=>[
  #        {:name=>"John Doe1", :number=>1, :cost=>100},
  #        {:name=>"John Doe1", :number=>1, :cost=>100},
  #        {:name=>"John Doe1", :number=>1, :cost=>100},
  #        {:name=>"John Doe1", :number=>1, :cost=>300},
  #        {:name=>"John Doe1", :number=>3, :cost=>400}
  #      ],
  #      "John Doe3"=>[{:name=>"John Doe3", :number=>1, :cost=>100}],
  #      "John Doe2"=>[{:name=>"John Doe2", :number=>2, :cost=>300}]
  #     }:each_value>

我们需要再次使用键d = c.reject { |v| v.size >= 3 } #=> [ # [{:name=>"John Doe3", :number=>1, :cost=>100}], # [{:name=>"John Doe2", :number=>2, :cost=>300}]] e = d.flatten #=> [{:name=>"John Doe3", :number=>1, :cost=>100}, # {:name=>"John Doe2", :number=>2, :cost=>300}] 对哈希进行分组。

:number

最后,对于f = e.group_by { |h| h[:number] } #=> {1=>[{:name=>"John Doe3", :number=>1, :cost=>100}], # 2=>[{:name=>"John Doe2", :number=>2, :cost=>300}]} 的每个值(哈希数组),选择f最大的元素h

h[:cost]