F#程序未运行至完成

时间:2013-11-02 04:56:47

标签: algorithm f# k-means

我正在尝试执行一个我从http://www.clear-lines.com/blog/post/Nearest-Neighbor-Classification-Part-2.aspx复制的F#脚本作为fs程序而不是脚本。我已经下载了我正在使用的所有库,并在其他环境中对它们进行了测试,它们都可以正常工作。它正确编译并将csv文件排序到一个数组中但它不会在以下后执行:

let labels = fileAsLines |> Array.map (fun line -> line.[4])
dataset, labels

提前感谢您提供的任何帮助,我经常阅读并经常使用此论坛,并感谢所有指导。

// Learn more about F# at http://fsharp.net
// Code from http://www.clear-lines.com/blog/post/Nearest-Neighbor-Classification-part-2.aspx

open MicrosoftResearch.Infer.Fun.FSharp.Syntax
open MicrosoftResearch.Infer.Fun.FSharp.Inference
open MicrosoftResearch.Infer.Fun.Lib
open MicrosoftResearch.Infer.Maths
open System.IO
open System
open System.Drawing
open MSDN.FSharp.Charting

let distance v1 v2 =
    Array.zip v1 v2
    |> Array.fold (fun sum e -> sum + pown (fst e - snd e) 2) 0.0|> sqrt

let classify subject dataset labels k =
    dataset
    |> Array.map (fun row -> distance row subject)
    |> Array.zip labels
    |> Array.sortBy snd
    |> Array.toSeq
    |> Seq.take k
    |> Seq.groupBy fst
    |> Seq.maxBy (fun g -> Seq.length (snd g))
let column (dataset: float [][]) i =
        dataset |> Array.map (fun row -> row.[i])

let columns (dataset: float [][]) =
    let cols = dataset.[0] |> Array.length
    [| for i in 0 .. (cols - 1) -> column dataset i |]

let minMax dataset =
    dataset
    |> columns
    |> Array.map (fun col -> Array.min(col), Array.max(col))

let minMaxNormalizer dataset =
    let bounds = minMax dataset
    fun (vector: float[]) ->
        Array.mapi (fun i v ->
            (vector.[i] - fst v) / (snd v - fst v)) bounds

let normalize data (normalizer: float[] -> float[]) =
    data |> Array.map normalizer

let classifier dataset labels k =
    let normalizer = minMaxNormalizer dataset
    let normalized = normalize dataset normalizer
    fun subject -> classify (normalizer(subject)) normalized labels k

let elections =
    let file = @"C:\Users\Jessica\Dataset\Election2008.txt"
    let fileAsLines =
        File.ReadAllLines(file)
            |> Array.map (fun line -> line.Split(','))
    let dataset =
        fileAsLines
        |> Array.map (fun line ->
            [| Convert.ToDouble(line.[1]);
               Convert.ToDouble(line.[2]);
               Convert.ToDouble(line.[3]) |])
    let labels = fileAsLines |> Array.map (fun line -> line.[4])
    dataset, labels

let evaluate dataset (labels: string []) k prop =
    let size = dataset |> Array.length
    let sample = floor ((float)size * prop) |> (int)
    let testSubjects, testLabels = dataset.[0 .. sample-1], labels.[0..sample-1]
    let trainData = dataset.[sample .. size-1], labels.[sample .. size-1]
    let c = classifier (fst trainData) (snd trainData) k   
    let results =
        testSubjects
        |> Array.mapi (fun i e -> fst (c e), testLabels.[i])
    results
    |> Array.iter (fun e -> printfn "%s %s" (fst e) (snd e))
    let correct =
       results
        |> Array.filter (fun e -> fst e = snd e)
        |> Array.length
    printfn "%i out of %i called correctly" correct sample

1 个答案:

答案 0 :(得分:2)

正在执行let elections块中的代码的原因是它被定义为值而不是函数(它不接受任何参数,也不接受单元())。这意味着它在声明时执行

在脚本之后出现的唯一代码声明了一个函数(称为evaluate;它看起来很相似,但它需要参数,因此除非调用它并提供所需的参数,否则不会执行),但是你没有任何代码可以调用它。

我认为最简单的改变就是让它做我认为你想做的事情:

  1. 从函数k的末尾删除propevaluate个参数(似乎没有使用这些参数)
  2. 在脚本的最后,使用evaluate中存储的值调用elections方法,如下所示:

    let dataset, labels = elections

    evaluate dataset labels

  3. 重新编译代码可能有意义,因为在elections的声明过程中执行代码似乎有点混乱,但是一旦你有代码工作,它可能是更容易重组并了解正在发生的事情。