我正在解析HTML(通过HAP),现在正在解析每行的特定表列内容(TD元素的集合)
注意:不使用FSharp.Data的HTML解析器,因为它与包含<Script>
代码的html损坏,导致CSS选择器失败(已知问题)
我尝试将一行数据映射到的类型(10个不同类型的“列”):
type DailyRow = { C0: string; C1: string; C2: int; C3: decimal; C4: string; C5: string; C6: int; C7: decimal; C8: decimal; C9: int }
我难看但有效的函数将列位置映射到记录字段中(是的,任何无法正确解析的东西都会爆炸):
let dailyRow = fun (record:DailyRow, column:int, node:HtmlNode) ->
printfn "dailyRow: Column %i has value %s" column node.InnerText
match column with
| 0 -> {record with C0 = node.InnerText }
| 1 -> {record with C1 = node.InnerText }
| 2 -> {record with C2 = (node.InnerText |> int) }
| 3 -> {record with C3 = Decimal.Parse(node.InnerText, NumberStyles.Currency) }
| 4 -> {record with C4 = node.InnerText }
| 5 -> {record with C5 = node.InnerText }
| 6 -> {record with C6 = Int32.Parse(node.InnerText, NumberStyles.AllowThousands) }
| 7 -> {record with C7 = Decimal.Parse(node.InnerText, NumberStyles.Currency) }
| 8 -> {record with C8 = Decimal.Parse(node.InnerText, NumberStyles.Currency) }
| 9 -> {record with C9 = (node.InnerText |> int) }
| _ -> raise (System.MissingFieldException("New Field in Chart Data Found: " + column.ToString()))
一些测试代码:
let chartRow = { C0 = ""; C1 = ""; C2 = 0; C3 = 0.0M; C4 = "" ; C5 = ""; C6 = 0; C7 = 0.0M; C8 = 0.0M; C9 = 0 }
let columnsToParse = row.SelectNodes "td" // 1 row of 10 columns
let x = columnsToParse
|> Seq.mapi (fun i x -> dailyRow(chartRow, i, x))
问题是,由于我要传递不可变的记录并通过Seq.mapi
从dailyRow函数接收新记录(使用索引映射到列号),因此我将得到10条记录,每条记录设置了其中一组values属性。
在C#中,我只需传递dailyRow
一个ref'd对象并将其更新到位,F#惯用的处理方式是什么?
答案 0 :(得分:1)
最简单的选择,如果您不介意数组分配:
let nodes = seq [...]
let arr = nodes |> Seq.map (fun n -> n.InnerText) |> Array.ofSeq
let record =
{ C0 = arr.[0]
C1 = arr.[1]
C2 = int arr.[2]
C3 = Decimal.Parse(arr.[3], NumberStyles.Currency)
C4 = arr.[4]
C5 = arr.[5]
C6 = Int32.Parse(arr.[6], NumberStyles.AllowThousands)
C7 = Decimal.Parse(arr.[7], NumberStyles.Currency)
C8 = Decimal.Parse(arr.[8], NumberStyles.Currency)
C9 = int arr.[9] }