深度哈希反转算法(应该是红宝石)

时间:2015-07-31 16:37:06

标签: ruby algorithm hash invert hashalgorithm

我有一个哈希H(见下),需要对它执行深度反转操作,这样就会返回一个新的哈希H2,其中每个键K都是一个值在原始哈希里面。 H2中的键映射到所有键序列的数组数组,当应用到原始哈希H时,会为您提供键K,这是原始值中的值哈希值。

也许我应该为输出使用不同的数据结构,例如散列哈希?

我希望这适用于任意嵌套级别的哈希。

我不知道从哪里开始设计最佳算法

原始哈希

输入可能是什么样的

{
  u: {
    u: { u: :phe, c: :phe, a: :leu, g: :leu },
    c: { u: :ser, c: :ser, a: :ser, g: :ser },
    a: { u: :tyr, c: :tyr, a: :STOP, g: :STOP },
    g: { u: :cys, c: :cys, a: :STOP, g: :trp }
  },
  c: {
    u: { u: :leu, c: :leu, a: :leu, g: :leu },
    c: { u: :pro, c: :pro, a: :pro, g: :pro },
    a: { u: :his, c: :his, a: :gln, g: :gln },
    g: { u: :arg, c: :arg, a: :arg, g: :arg }
  },
  {...}
}

简化输出

输出结果如何

{
  phe: [[:u,:u,:u],[:u,:u,:c]],
  leu: [[:u,:u,:a],[:u,:u,:g]],
  ser: [[:u,:c,:u],[:u,:c,:c],[:u,:u,:a],[:u,:u,:g]],
  tyr: [[:u,:a,:u],[:u,:a,:c]],
  "...": [[...]]
}

为什么呢?我正在编写自己的生物信息学库,希望能够返回给定蛋白质的可能核苷酸序列,用三个字符:symbols 表示

1 个答案:

答案 0 :(得分:3)

<强>代码

def recurse(h, arr=[])
  h.each_with_object({}) { |(k,v),g| g.update((Hash===v) ?
    recurse(v, arr + [k]) : { v=>[arr+[k]] }) { |_,o,n| o+n } }
end

递归使用Hash#update(aka merge!)的形式,它使用块{ |_,o,n| o+n } }来确定合并的两个哈希中存在的键的值。

示例1

h =
{
  u: {
    u: { u: :phe, c: :phe, a: :leu, g: :leu },
    c: { u: :ser, c: :ser, a: :ser, g: :ser },
    a: { u: :tyr, c: :tyr, a: :STOP, g: :STOP },
    g: { u: :cys, c: :cys, a: :STOP, g: :trp }
  },
  c: {
    u: { u: :leu, c: :leu, a: :leu, g: :leu },
    c: { u: :pro, c: :pro, a: :pro, g: :pro },
    a: { u: :his, c: :his, a: :gln, g: :gln },
    g: { u: :arg, c: :arg, a: :arg, g: :arg }
  },
}

recurse h
  #=> {:phe=>[[:u, :u, :u], [:u, :u, :c]],
  #    :leu=>[[:u, :u, :a], [:u, :u, :g], [:c, :u, :u],
  #      [:c, :u, :c], [:c, :u, :a], [:c, :u, :g]],
  #    :ser=>[[:u, :c, :u], [:u, :c, :c], [:u, :c, :a], [:u, :c, :g]], 
  #    :tyr=>[[:u, :a, :u], [:u, :a, :c]],
  #    :STOP=>[[:u, :a, :a], [:u, :a, :g], [:u, :g, :a]],
  #    :cys=>[[:u, :g, :u], [:u, :g, :c]],
  #    :trp=>[[:u, :g, :g]],
  #    :pro=>[[:c, :c, :u], [:c, :c, :c], [:c, :c, :a], [:c, :c, :g]], 
  #    :his=>[[:c, :a, :u], [:c, :a, :c]],
  #    :gln=>[[:c, :a, :a], [:c, :a, :g]],
  #    :arg=>[[:c, :g, :u], [:c, :g, :c], [:c, :g, :a], [:c, :g, :g]]}

示例2

h =
{
  u: {
    u: { u: :phe, a: :leu },
    c: { u: :ser, c: :phe },
    a: { u: :tyr, c: { a: { u: :leu, c: :ser }, u: :tyr } }
  },
  c: {
    u: { u: :leu, c: :pro },
    a: { u: :arg }
  },
}

recurse(h)
  #=> {:phe=>[[:u, :u, :u], [:u, :c, :c]],
  #    :leu=>[[:u, :u, :a], [:u, :a, :c, :a, :u], [:c, :u, :u]],
  #    :ser=>[[:u, :c, :u], [:u, :a, :c, :a, :c]],
  #    :tyr=>[[:u, :a, :u], [:u, :a, :c, :u]],
  #    :pro=>[[:c, :u, :c]], :arg=>[[:c, :a, :u]]}

<强>解释

以下是修改后的代码,用于显示正在执行的计算:

def recurse(h, arr=[], level = 0)
  indent = ' '*(2*level)
  puts "#{indent}level = #{level}"
  puts "#{indent}h= #{h}"
  puts "#{indent}arr= #{arr}"
  g = h.each_with_object({}) do |(k,v),g|
    puts "#{indent}  level = #{level}"
    puts "#{indent}  k=#{k}"
    puts "#{indent}  v=#{v}"
    puts "#{indent}  g=#{g}"
    case v
    when Hash
      puts "#{indent}  v is Hash"
      g.update(recurse(v, arr + [k], level+1)) { |_,o,n| o+n }
    else
      puts "#{indent}  v is not a Hash"
      g.update({ v=>[arr+[k]] }) { |_,o,n| o+n }
    end
  end
  puts "#{indent}return #{g}"
  g
end

示例2的recurse h输出(仅适用于顽固分子)。

level = 0
h= {:u=>{:u=>{:u=>:phe, :a=>:leu}, :c=>{:u=>:ser, :c=>:phe}, :a=>{:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}}, :c=>{:u=>{:u=>:leu, :c=>:pro}, :a=>{:u=>:arg}}}
arr= []
  level = 0
  k=u
  v={:u=>{:u=>:phe, :a=>:leu}, :c=>{:u=>:ser, :c=>:phe},
     :a=>{:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}}
  g={}
  v is Hash
  level = 1
  h= {:u=>{:u=>:phe, :a=>:leu}, :c=>{:u=>:ser, :c=>:phe},
      :a=>{:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}}
  arr= [:u]
    level = 1
    k=u
    v={:u=>:phe, :a=>:leu}
    g={}
    v is Hash
    level = 2
    h= {:u=>:phe, :a=>:leu}
    arr= [:u, :u]
      level = 2
      k=u
      v=phe
      g={}
      v is not a Hash
      level = 2
      k=a
      v=leu
      g={:phe=>[[:u, :u, :u]]}
      v is not a Hash
    return {:phe=>[[:u, :u, :u]], :leu=>[[:u, :u, :a]]}
    level = 1
    k=c
    v={:u=>:ser, :c=>:phe}
    g={:phe=>[[:u, :u, :u]], :leu=>[[:u, :u, :a]]}
    v is Hash
    level = 2
    h= {:u=>:ser, :c=>:phe}
    arr= [:u, :c]
      level = 2
      k=u
      v=ser
      g={}
      v is not a Hash
      level = 2
      k=c
      v=phe
      g={:ser=>[[:u, :c, :u]]}
      v is not a Hash
    return {:ser=>[[:u, :c, :u]], :phe=>[[:u, :c, :c]]}
    level = 1
    k=a
    v={:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}
    g={:phe=>[[:u, :u, :u], [:u, :c, :c]], :leu=>[[:u, :u, :a]], :ser=>[[:u, :c, :u]]}
    v is Hash
    level = 2
    h= {:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}
    arr= [:u, :a]
      level = 2
      k=u
      v=tyr
      g={}
      v is not a Hash
      level = 2
      k=c
      v={:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}
      g={:tyr=>[[:u, :a, :u]]}
      v is Hash
      level = 3
      h= {:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}
      arr= [:u, :a, :c]
        level = 3
        k=a
        v={:u=>:leu, :c=>:ser}
        g={}
        v is Hash
        level = 4
        h= {:u=>:leu, :c=>:ser}
        arr= [:u, :a, :c, :a]
          level = 4
          k=u
          v=leu
          g={}
          v is not a Hash
          level = 4
          k=c
          v=ser
          g={:leu=>[[:u, :a, :c, :a, :u]]}
          v is not a Hash
        return {:leu=>[[:u, :a, :c, :a, :u]], :ser=>[[:u, :a, :c, :a, :c]]}
        level = 3
        k=u
        v=tyr
        g={:leu=>[[:u, :a, :c, :a, :u]], :ser=>[[:u, :a, :c, :a, :c]]}
        v is not a Hash
      return {:leu=>[[:u, :a, :c, :a, :u]], :ser=>[[:u, :a, :c, :a, :c]],
              :tyr=>[[:u, :a, :c, :u]]}
    return {:tyr=>[[:u, :a, :u], [:u, :a, :c, :u]], :leu=>[[:u, :a, :c, :a, :u]],
            :ser=>[[:u, :a, :c, :a, :c]]}
  return {:phe=>[[:u, :u, :u], [:u, :c, :c]], :leu=>[[:u, :u, :a], [:u, :a, :c, :a, :u]],
          :ser=>[[:u, :c, :u], [:u, :a, :c, :a, :c]], :tyr=>[[:u, :a, :u], [:u, :a, :c, :u]]}
  level = 0
  k=c
  v={:u=>{:u=>:leu, :c=>:pro}, :a=>{:u=>:arg}}
  g={:phe=>[[:u, :u, :u], [:u, :c, :c]], :leu=>[[:u, :u, :a], [:u, :a, :c, :a, :u]],
     :ser=>[[:u, :c, :u], [:u, :a, :c, :a, :c]], :tyr=>[[:u, :a, :u], [:u, :a, :c, :u]]}
  v is Hash
  level = 1
  h= {:u=>{:u=>:leu, :c=>:pro}, :a=>{:u=>:arg}}
  arr= [:c]
    level = 1
    k=u
    v={:u=>:leu, :c=>:pro}
    g={}
    v is Hash
    level = 2
    h= {:u=>:leu, :c=>:pro}
    arr= [:c, :u]
      level = 2
      k=u
      v=leu
      g={}
      v is not a Hash
      level = 2
      k=c
      v=pro
      g={:leu=>[[:c, :u, :u]]}
      v is not a Hash
    return {:leu=>[[:c, :u, :u]], :pro=>[[:c, :u, :c]]}
    level = 1
    k=a
    v={:u=>:arg}
    g={:leu=>[[:c, :u, :u]], :pro=>[[:c, :u, :c]]}
    v is Hash
    level = 2
    h= {:u=>:arg}
    arr= [:c, :a]
      level = 2
      k=u
      v=arg
      g={}
      v is not a Hash
    return {:arg=>[[:c, :a, :u]]}
  return {:leu=>[[:c, :u, :u]], :pro=>[[:c, :u, :c]], :arg=>[[:c, :a, :u]]}
return {:phe=>[[:u, :u, :u], [:u, :c, :c]],
        :leu=>[[:u, :u, :a], [:u, :a, :c, :a, :u], [:c, :u, :u]],
        :ser=>[[:u, :c, :u], [:u, :a, :c, :a, :c]],
        :tyr=>[[:u, :a, :u], [:u, :a, :c, :u]],
        :pro=>[[:c, :u, :c]],
        :arg=>[[:c, :a, :u]]}
  #=> <the last value returned above>