Question

我正在研究F＃中的一种语言，经过测试，我发现运行时花费了90％以上的时间来比较相等性。因为语言太慢而无法使用。在检测期间，GetHashCode函数在列表中显示相当高的开销作为开销源。发生的事情是在方法调用期间，我使用方法体（Expr）以及调用参数作为字典中的键，并触发AST段上的重复遍历。

为了提高性能，我想在AST中添加memoization节点。

type Expr =
| Add of Expr * Expr
| Lit of int
| HashNode of int * Expr

在上面的简化示例中，我想要的是HashNode表示其Expr的哈希值，因此GetHashCode不必在AST中更深入地行进以便计算它

说过，我不知道如何覆盖GetHashCode方法。理想情况下，我希望重用内置的哈希方法，并使其仅以某种方式忽略HashNode，但我不知道该怎么做。

更有可能的是，我将不得不创建自己的哈希函数，但不幸的是我对哈希函数一无所知，所以我现在有点迷失。

我所拥有的另一个想法是替换具有唯一ID的节点，同时保持该散列函数不变，但这会在代码中引入额外的复杂性，除非我不得不这样做。

Answer 1

我最近在TheGamma（GitHub）中需要一个类似的东西，我在其中构建一个依赖图（类似于AST），它经常被重新创建（当你在编辑器中更改代码并重新解析时），但是我的实时预览可能需要一些时间来计算，所以我想尽可能多地重用上一个图表。

我这样做的方式是我附上一个＆＃34;符号＆＃34;到每个节点。具有相同符号的两个节点是相同的，我认为您可以使用这些节点进行有效的相等性测试：

type Expr =
  | Add of ExprNode * ExprNode
  | Lit of int

and ExprNode(expr:Expr, symbol:int) = 
  member x.Expression = expr
  member x.Symbol = symbol
  override x.GetHashCode() = symbol
  override x.Equals(y) = 
    match y with 
    | :? ExprNode as y -> y.Symbol = x.Symbol
    | _ -> false

我确实保留了节点的缓存 - 关键是节点类的一些代码（Add为0，Lit为1等）和所有嵌套节点的符号。对于文字，我还添加了数字本身，这意味着两次创建相同的文字将为您提供相同的节点。因此，创建节点如下所示：

let node expr ctx =  
  // Get the key from the kind of the expression
  // and symbols of all nested node in this expression
  let key = 
    match expr with 
    | Lit n -> [0; n]
    | Add(e1, e2) -> [1; e1.Symbol; e2.Symbol]
  // Return either a node from cache or create a new one
  match ListDictionary.tryFind key ctx with
  | Some res -> res
  | None ->
      let res = ExprNode(expr, nextId())
      ListDictionary.set key res ctx
      res

ListDictionary模块是一个可变字典，其中键是整数列表，nextId是生成下一个ID的常用函数：

type ListDictionaryNode<'K, 'T> = 
  { mutable Result : 'T option
    Nested : Dictionary<'K, ListDictionaryNode<'K, 'T>> }

type ListDictionary<'K, 'V> = Dictionary<'K, ListDictionaryNode<'K, 'V>>

[<CompilationRepresentation(CompilationRepresentationFlags.ModuleSuffix)>]
module ListDictionary = 
  let tryFind ks dict = 
    let rec loop ks node =
      match ks, node with
      | [], { Result = Some r } -> Some r
      | k::ks, { Nested = d } when d.ContainsKey k -> loop ks (d.[k])
      | _ -> None
    loop ks { Nested = dict; Result = None }

  let set ks v dict =
    let rec loop ks (dict:ListDictionary<_, _>) = 
      match ks with
      | [] -> failwith "Empty key not supported"
      | k::ks ->
          if not (dict.ContainsKey k) then 
            dict.[k] <- { Nested = Dictionary<_, _>(); Result = None }
          if List.isEmpty ks then dict.[k].Result <- Some v
          else loop ks (dict.[k].Nested)
    loop ks dict


let nextId = 
  let mutable id = 0
  fun () -> id <- id + 1; id

所以，我想我说你需要实现自己的缓存机制，但这对我来说效果很好，可能会暗示如何在你的情况下这样做！

如何缓存AST的哈希码？

1 个答案: