我正在学习F#(这很有趣,不适合工作/学校)而且我正在尝试编写一个简单的解析器来计算Windows Phone应用程序在多个市场中的评论数量。毫无疑问,我到目前为止的代码是丑陋的,但我正在努力改进它并遵循函数式编程范式。由于我来自C,C ++,C#世界,所以很难。
来自C世界,我喜欢空值。我知道函数式编程/ F#不鼓励使用null,但我无法找到一种不使用它的方法。例如,在函数解析中有一个空检查。我怎么不这样做?
现在我的代码只计算第一页上的评论数量,但是应用可能有超过10条评论,因此可能会有多个页面。我如何递归浏览所有页面(功能下载评论或解析)。
我们如何将此代码扩展为完全异步?
以下是我到目前为止的代码。除了上面的问题,我真的希望有人能帮助我,并告诉我如何改进我的代码的整体结构。
open System
open System.IO
open System.Xml
open System.Xml.Linq
open Printf
type DownloadPageResult = {
Uri: System.Uri;
ErrorOccured: bool;
Source: string;
}
type ReviewData = {
CurrentPageUri: System.Uri;
NextPageUri: System.Uri;
NumberOfReviews: int;
}
module ReviewUrl =
let getBaseUri path =
new Uri(sprintf "http://cdn.marketplaceedgeservice.windowsphone.com/%s" path)
let getUri country locale appId =
getBaseUri(sprintf "/v8/ratings/product/%s/reviews?os=8.0.0.0&cc=%s&oc=&lang=%s&hw=520170499&dm=Test&chunksize=10" appId country locale)
let downloadPage (uri: System.Uri) =
try
use webClient = new System.Net.WebClient()
printfn "%s" (uri.ToString())
webClient.Headers.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
webClient.Headers.Add("Accept-Encoding", "zip,deflate,sdch")
webClient.Headers.Add("Accept-Language", "en-US,en;q=0.8,fr;q=0.6")
webClient.Headers.Add("User-Agent", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1482.0 Safari/537.36")
{ Uri = uri; Source = webClient.DownloadString(uri); ErrorOccured = false }
with error -> { Uri = uri; Source = String.Empty; ErrorOccured = true }
let downloadReview country locale appId =
let uri = ReviewUrl.getUri country locale appId
downloadPage uri
let parse(pageResult: DownloadPageResult) =
if pageResult.ErrorOccured then { CurrentPageUri = pageResult.Uri; NextPageUri = null; NumberOfReviews = 0 }
else
let reader = new StringReader(pageResult.Source)
let doc = XDocument.Load(reader)
let ns = XNamespace.Get("http://www.w3.org/2005/Atom")
let nextUrl = query { for link in doc.Descendants(ns + "link") do
where (link.Attribute(XName.Get("rel")).Value = "next")
select link.Value
headOrDefault }
if nextUrl = null then
{ CurrentPageUri = pageResult.Uri; NextPageUri = null; NumberOfReviews = doc.Descendants(ns + "entry") |> Seq.length }
else
{ CurrentPageUri = pageResult.Uri; NextPageUri = ReviewUrl.getBaseUri(nextUrl); NumberOfReviews = doc.Descendants(ns + "entry") |> Seq.length }
let downloadReviews(locale: string) =
let appId = "4e08377c-1240-4f80-9c35-0bacde2c66b6"
let country = locale.Substring(3)
let pageResult = downloadReview country locale appId
let parseResult = parse pageResult
parseResult
[<EntryPoint>]
let main argv =
let locales = [| "en-US"; "en-GB"; |]
let results = locales |> Array.map downloadReviews
printfn "%A" results
0
答案 0 :(得分:10)
我正在更多地讨论这个问题并尝试使用XML类型提供程序和F#Data的其他功能。它不是完整的代码,但它应该足以给你一个想法(并显示类型提供者非常好: - )):
首先,我需要一些参考资料:
#r "System.Xml.Linq.dll"
#r "FSharp.Data.dll"
open FSharp.Data
open FSharp.Net
接下来,我编写了以下代码来下载一个示例页面。
let data =
Http.Request
( "http://cdn.marketplaceedgeservice.windowsphone.com//v8/ratings/product/4e08377c-1240-4f80-9c35-0bacde2c66b6/reviews",
query=["os", "8.0.0.0"; "cc", "US"; "lang", "en-US"; "hw", "520170499"; "dm", "Test"; "chunksize", "10" ],
headers=["User-Agent", "F#"])
我将示例保存为D:\temp\appstore.xml
,然后使用XML类型提供程序获取用于解析页面的好类型:
type PageDocument = XmlProvider< @"D:\temp\appstore.xml" >
然后你可以下载&amp;像这样解析页面(这显示了如何获得评论的数量和有关下一个链接的信息):
let parseAsync (locale:string) appId = async {
let country = locale.Substring(3)
// Make the request (asynchronously) using the parameters specified
let! data =
Http.AsyncRequest
( "http://cdn.marketplaceedgeservice.windowsphone.com//v8/ratings/product/"
+ appId + "/reviews",
query=[ "os", "8.0.0.0"; "cc", country; "lang", locale;
"hw", "520170499"; "dm", "Test"; "chunksize", "10" ],
headers=["User-Agent", "F#"])
// Parse the result using the type-provider generated type
let page = PageDocument.Parse(data)
// Now you can type 'page' followed by '.' and explore the results!
// page.GetLinks() returns all links and page.GetEntries() returns
// review entries. Each link also has 'Rel' and 'Href' properties:
let nextLink =
page.GetLinks()
|> Seq.tryFind (fun link -> link.Rel = "next")
|> Option.map (fun link -> link.Href)
let reviewsCount = page.GetEntries().Length
return (reviewsCount, nextLink) }
答案 1 :(得分:2)
使代码异步的一般模式是找到I / O昂贵的操作(在调用树的某个地方)然后从那里“向上”并使所有使用它的代码也异步,直到你达到一个点你需要阻止。
在您的示例中,原始操作正在下载,因此您首先要使downloadPage
异步:
let downloadPage (uri: System.Uri) = async {
try
use webClient = new System.Net.WebClient()
printfn "%s" (uri.ToString())
// (Headers omitted)
let! source = webClient.AsyncDownloadString(uri)
return { Uri = uri; Source = source; ErrorOccured = false }
with error ->
return { Uri = uri; Source = String.Empty; ErrorOccured = true } }
您需要在async { ... }
中包装代码,使用DownloadString
调用let!
的异步版本,并使用return
(在两个分支中)返回结果。
然后你需要创建像downloadReview
和downloadReviews
这样的函数(再次,将它们包装在异步块中,使用downloadPage
或使用{{调用let!
之类的其他异步操作1}})。
最后,如果您正在编写控制台应用程序,则需要阻止,但您可以并行运行不同语言环境的下载。假设return!
是异步的:
downloadReviews
要回答其他问题,我认为在上面的示例中使用let locales = [| "en-US"; "en-GB"; |]
let results =
locales
|> Array.map downloadReviews // Build an array of asynchronous computations
|> Async.Parallel // Compose them into a single, parallel computation
|> Async.RunSynchronously // Run the computation and wait
可能没问题(您正在调用返回它的LINQ,因此没有简单的方法可以避免这种情况)。实际上可以使用选项类型,但这有点棘手 - 请参阅this snippet if you're interested。
此外,您可以使用F# Data Library中的null
方法,这样可以更简单地构建复杂的HTTP请求(但我是该库的贡献者之一,所以我是偏置!)
答案 2 :(得分:2)
正如Tomas所说,创建一个基于异步的版本的DownloadString
(或者只是使用他的FSharp.Data库来处理它)会更“功能”。
您还可以将FSharp.Data与ExtCore结合使用,以利用ExtCore中的asyncMaybe
或asyncChoice
工作流程。这些工作流程在普通async
工作流程之上提供了非常易于使用的错误处理。
无论如何,我花了几分钟清理你的代码。它并不多,但它确实在几个方面简化了代码:
open System
open System.IO
open System.Xml
open System.Xml.Linq
open Printf
type DownloadPageResult = {
Uri : System.Uri;
ErrorOccured : bool;
Source : string;
}
type ReviewData = {
CurrentPageUri : System.Uri;
NextPageUri : System.Uri option;
NumberOfReviews : uint32;
}
module ReviewUrl =
let baseUri = Uri ("http://cdn.marketplaceedgeservice.windowsphone.com/", UriKind.Absolute)
let getUri country locale (appId : System.Guid) =
let localUri =
let appIdStr = appId.ToString "D"
sprintf "/v8/ratings/product/%s/reviews?os=8.0.0.0&cc=%s&oc=&lang=%s&hw=520170499&dm=Test&chunksize=10" appIdStr country locale
Uri (baseUri, localUri)
let downloadPage (uri : System.Uri) =
try
use webClient = new System.Net.WebClient()
printfn "%s" (uri.ToString())
webClient.Headers.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
webClient.Headers.Add("Accept-Encoding", "zip,deflate,sdch")
webClient.Headers.Add("Accept-Language", "en-US,en;q=0.8,fr;q=0.6")
webClient.Headers.Add("User-Agent", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1482.0 Safari/537.36")
{ Uri = uri; Source = webClient.DownloadString uri; ErrorOccured = false }
with error ->
{ Uri = uri; Source = String.Empty; ErrorOccured = true }
let parse (pageResult : DownloadPageResult) =
if pageResult.ErrorOccured then
{ CurrentPageUri = pageResult.Uri; NextPageUri = None; NumberOfReviews = 0u }
else
use reader = new StringReader (pageResult.Source)
let doc = XDocument.Load reader
let ns = XNamespace.Get "http://www.w3.org/2005/Atom"
let nextUrl =
query {
for link in doc.Descendants(ns + "link") do
where (link.Attribute(XName.Get("rel")).Value = "next")
select link.Value
headOrDefault }
{ CurrentPageUri = pageResult.Uri;
NextPageUri =
if System.String.IsNullOrEmpty nextUrl then None
else Some <| Uri (ReviewUrl.baseUri, nextUrl);
NumberOfReviews =
doc.Descendants (ns + "entry") |> Seq.length |> uint32; }
let downloadReviews (locale : string) =
System.Guid "4e08377c-1240-4f80-9c35-0bacde2c66b6"
|> ReviewUrl.getUri (locale.Substring 3) locale
|> downloadPage
|> parse
[<EntryPoint>]
let main argv =
let locales = [| "en-US"; "en-GB"; |]
let results = locales |> Array.map downloadReviews
printfn "%A" results
0