我正在尝试执行一个我从http://www.clear-lines.com/blog/post/Nearest-Neighbor-Classification-Part-2.aspx复制的F#脚本作为fs程序而不是脚本。我已经下载了我正在使用的所有库,并在其他环境中对它们进行了测试,它们都可以正常工作。它正确编译并将csv文件排序到一个数组中但它不会在以下后执行:
let labels = fileAsLines |> Array.map (fun line -> line.[4])
dataset, labels
提前感谢您提供的任何帮助,我经常阅读并经常使用此论坛,并感谢所有指导。
// Learn more about F# at http://fsharp.net
// Code from http://www.clear-lines.com/blog/post/Nearest-Neighbor-Classification-part-2.aspx
open MicrosoftResearch.Infer.Fun.FSharp.Syntax
open MicrosoftResearch.Infer.Fun.FSharp.Inference
open MicrosoftResearch.Infer.Fun.Lib
open MicrosoftResearch.Infer.Maths
open System.IO
open System
open System.Drawing
open MSDN.FSharp.Charting
let distance v1 v2 =
Array.zip v1 v2
|> Array.fold (fun sum e -> sum + pown (fst e - snd e) 2) 0.0|> sqrt
let classify subject dataset labels k =
dataset
|> Array.map (fun row -> distance row subject)
|> Array.zip labels
|> Array.sortBy snd
|> Array.toSeq
|> Seq.take k
|> Seq.groupBy fst
|> Seq.maxBy (fun g -> Seq.length (snd g))
let column (dataset: float [][]) i =
dataset |> Array.map (fun row -> row.[i])
let columns (dataset: float [][]) =
let cols = dataset.[0] |> Array.length
[| for i in 0 .. (cols - 1) -> column dataset i |]
let minMax dataset =
dataset
|> columns
|> Array.map (fun col -> Array.min(col), Array.max(col))
let minMaxNormalizer dataset =
let bounds = minMax dataset
fun (vector: float[]) ->
Array.mapi (fun i v ->
(vector.[i] - fst v) / (snd v - fst v)) bounds
let normalize data (normalizer: float[] -> float[]) =
data |> Array.map normalizer
let classifier dataset labels k =
let normalizer = minMaxNormalizer dataset
let normalized = normalize dataset normalizer
fun subject -> classify (normalizer(subject)) normalized labels k
let elections =
let file = @"C:\Users\Jessica\Dataset\Election2008.txt"
let fileAsLines =
File.ReadAllLines(file)
|> Array.map (fun line -> line.Split(','))
let dataset =
fileAsLines
|> Array.map (fun line ->
[| Convert.ToDouble(line.[1]);
Convert.ToDouble(line.[2]);
Convert.ToDouble(line.[3]) |])
let labels = fileAsLines |> Array.map (fun line -> line.[4])
dataset, labels
let evaluate dataset (labels: string []) k prop =
let size = dataset |> Array.length
let sample = floor ((float)size * prop) |> (int)
let testSubjects, testLabels = dataset.[0 .. sample-1], labels.[0..sample-1]
let trainData = dataset.[sample .. size-1], labels.[sample .. size-1]
let c = classifier (fst trainData) (snd trainData) k
let results =
testSubjects
|> Array.mapi (fun i e -> fst (c e), testLabels.[i])
results
|> Array.iter (fun e -> printfn "%s %s" (fst e) (snd e))
let correct =
results
|> Array.filter (fun e -> fst e = snd e)
|> Array.length
printfn "%i out of %i called correctly" correct sample
答案 0 :(得分:2)
正在执行let elections
块中的代码的原因是它被定义为值而不是函数(它不接受任何参数,也不接受单元()
)。这意味着它在声明时执行。
在脚本之后出现的唯一代码声明了一个函数(称为evaluate
;它看起来很相似,但它需要参数,因此除非调用它并提供所需的参数,否则不会执行),但是你没有任何代码可以调用它。
我认为最简单的改变就是让它做我认为你想做的事情:
k
的末尾删除prop
和evaluate
个参数(似乎没有使用这些参数)在脚本的最后,使用evaluate
中存储的值调用elections
方法,如下所示:
let dataset, labels = elections
evaluate dataset labels
重新编译代码可能有意义,因为在elections
的声明过程中执行代码似乎有点混乱,但是一旦你有代码工作,它可能是更容易重组并了解正在发生的事情。