如果f#接受了在关系数据库中排序的数据,并将其转换为可用于第三方平台的星型模式,我们将编写一个etl流程。因为我们正在对数据进行非规范化,所以我们(几乎)有重复的对象,类型和属性分散在系统中。到目前为止,我一直对此感到满意,因为对象的差异足以保证不同的功能,或者我们已经能够将公共/共享属性分组到子记录中。
但是,我们现在添加的对象需要选择和选择系统的不同部分,并且不属于现有的常规分组。 在尝试了几种不同的样式后,我开始使用界面,但是使用它们时感觉有些不对劲。有谁遇到过这个问题并提出了不同的方法?
module rec MyModels =
type AccountType1 =
{ Id : int
Error : string option
Name : string option }
// PROBLEM: this get very bulky as more properties are shared
interface Props.Error<AccountType1> with member x.Optic = (fun _ -> x.Error), (fun v -> { x with Error = v })
interface Props.AccountId<AccountType1> with member x.Optic = (fun _ -> x.Id), (fun v -> { x with Id = v })
interface Props.AccountName<AccountType1> with member x.Optic = (fun _ -> x.Name), (fun v -> { x with Name = v })
type AccountType2 =
{ Id : int
Error : string option
AccountId : int
AccountName : string option
OtherValue : string }
interface Props.Error<AccountType2> with member x.Optic = (fun _ -> x.Error), (fun v -> { x with Error = v })
interface Props.AccountId<AccountType2> with member x.Optic = (fun _ -> x.AccountId), (fun v -> { x with AccountId = v })
interface Props.AccountName<AccountType2> with member x.Optic = (fun _ -> x.AccountName), (fun v -> { x with AccountName = v })
interface Props.OtherValue<AccountType2> with member x.Optic = (fun _ -> x.OtherValue), (fun v -> { x with OtherValue = v })
module Props =
type OpticProp<'a,'b> = (unit -> 'a) * ('a -> 'b)
// Common properties my models can share
// (I know they should start with an I)
type Error<'a> = abstract member Optic : OpticProp<string option, 'a>
let Error (h : Error<_>) = h.Optic
type AccountId<'a> = abstract member Optic : OpticProp<int, 'a>
let AccountId (h : AccountId<_>) = h.Optic
type AccountName<'a> = abstract member Optic : OpticProp<string option, 'a>
let AccountName (h : AccountName<_>) = h.Optic
type OtherValue<'a> = abstract member Optic : OpticProp<string, 'a>
let OtherValue (h : OtherValue<_>) = h.Optic
[<RequireQualifiedAccess>]
module Optics =
// Based on Aether
module Operators =
let inline (^.) o optic = (optic o |> fst) ()
let inline (^=) value optic = fun o -> (optic o |> snd) value
let inline get optic o =
let get, _ = optic o
get ()
let inline set optic v (o : 'a) : 'a =
let _, set = optic o
set v
open MyModels
open Optics.Operators
// Common functions that change the models
let error msg item =
item
|> (Some msg)^=Props.Error
|> Error
let accountName item =
match item^.Props.AccountId with
| 1 ->
item
|> (Some "Account 1")^=Props.AccountName
|> Ok
| 2 ->
item
|> (Some "Account 2")^=Props.AccountName
|> Ok
| _ ->
item
|> error "Can't find account"
let correctAccount item =
match item^.Props.AccountName with
| Some "Account 1" -> Ok item
| _ ->
item
|> error "This is not Account 1"
let otherValue lookup item =
let value = lookup ()
item
|> value^=Props.OtherValue
|> Ok
// Build the transform pipeline
let inline (>=>) a b =
fun value ->
match a value with
| Ok result -> b result
| Error error -> Error error
let account1TransformPipeline lookups = // Lookups can be passed around is needed
accountName
>=> correctAccount
let account2TransformPipeline lookups =
accountName
>=> correctAccount
>=> otherValue lookups
// Try out the pipelines
let account1 =
({ Id = 1; Error = None; Name = None } : AccountType1)
|> account1TransformPipeline ()
let account2 =
({ Id = 1; Error = None; AccountId = 1; AccountName = None; OtherValue = "foo" } : AccountType2)
|> account2TransformPipeline (fun () -> "bar")
我尝试过的其他事情:
答案 0 :(得分:21)
我不太确定如何使您的解决方案更简单-我认为在您的方法中非常喜欢使用类型会使代码非常复杂。在保留某种类型的同时,可能还有其他简化方法。同样,我认为在某些情况下,您需要实现的逻辑是相当动态的,因此即使在F#中,也可能需要使用更多动态技术。
举个例子,这是一个使用Deedle data frame library进行操作的例子。这样,您就可以将数据表示为数据框(列名称为字符串)。
在数据帧上编写所需的两个清理操作相对容易-库针对基于列的操作进行了优化,因此代码结构与您的代码结构略有不同(我们先计算新列,然后将其替换为所有列)数据框中的行):
let correctAccount idCol nameCol df =
let newNames = df |> Frame.getCol idCol |> Series.map (fun _ id ->
match id with
| 1 -> "Account 1"
| 2 -> "Account 2"
| _ -> failwith "Cannot find account")
df |> Frame.replaceCol nameCol newNames
let otherValue newValue df =
let newOther = df |> Frame.getCol "OtherValue" |> Series.mapAll (fun _ _ -> Some newValue)
df |> Frame.replaceCol "OtherValue" newOther
您的管道随后可以获取记录,将它们转换为数据帧并进行所有处理:
[ { Id = 1; Error = None; Name = None } ]
|> Frame.ofRecords
|> correctAccount "Id" "Name"
[ { Id = 1; Error = None; AccountId = 1; AccountName = None; OtherValue = "foo" } ]
|> Frame.ofRecords
|> correctAccount "Id" "AccountName"
|> otherValue "bar"
与您的方法相比,这不是类型安全的方法,但是我相信人们可以真正阅读代码并很好地了解代码的用途,这可能值得权衡。