解析HTML表--Swift 3

时间:2016-07-10 16:25:03

标签: html ios json swift httprequest

我拥有佛蒙特州彩票网站(this page)页面的所有HTML代码。在那个页面中,有一个巨大的表:

lottery prize table

我使用此代码(在Playground中)成功获取该页面的HTML:

import UIKit
import Foundation
import PlaygroundSupport

let url:URL = URL(string: "https://vtlottery.com/games/instant-tickets/outstanding-prizes")!
let session = URLSession.shared

let request = NSMutableURLRequest(url: url)
request.httpMethod = "POST"
request.cachePolicy = NSURLRequest.CachePolicy.reloadIgnoringCacheData

let paramString = "data=Hello"
request.httpBody = paramString.data(using: String.Encoding.utf8)

let task = session().dataTask(with: request as URLRequest) {
    (
    data, response, error) in

    guard let data = data, let _:URLResponse = response  where error == nil else {
        print("error")
        return
    }

    let dataString = NSString(data: data, encoding: String.Encoding.utf8.rawValue)
    print(dataString!)

}

task.resume()

PlaygroundPage.current.needsIndefiniteExecution = true

如何从表中获取数据,以便将其转换为巨大的JSON哈希表(或类似表)?

基本上,我希望该图表的HTML代码变成类似:

        "row 1": {
        "Price":"$20",
        "Game #":"1333",
        "Game Name":"Money"
        "Top Prizes":["$150000", "$5000", "$500"],
        "Unclaimed Top Prizes":["2", "16", "246"],
        "Total Unclaimed":"$2479510",
        "% Sold": "1",
        "# Of Tickets":"183600"
        },
        "row 2": {
        "Price":"$10",
        "Game #":"1339",
        "Game Name":"Diamonds & Pearls"
        "Top Prizes":["$50000", "$5000", "$500"],
        "Unclaimed Top Prizes":["3", "5", "174"],
        "Total Unclaimed":"$1264925",
        "% Sold": "4",
        "# Of Tickets":"201650"
        }, ... (etc.)

我如何开始解析这些数据?

1 个答案:

答案 0 :(得分:1)

您需要一个HTML解析器来浏览响应HTML。甚至不要考虑使用正则表达式。下面的答案使用HTMLReader,您可以通过CocoaPods将其添加到项目中。游乐场一直让我崩溃所以我将代码转换为IBAction代替:

import UIKit
import MapKit
import HTMLReader

class ViewController: UIViewController {
    var result = [String: [String: AnyObject]]()

    override func viewDidLoad() {
        super.viewDidLoad()
    }

    override func didReceiveMemoryWarning() {
        super.didReceiveMemoryWarning()
        // Dispose of any resources that can be recreated.
    }

    @IBAction func loadHTML(_ sender: AnyObject) {
        let url = URL(string: "https://vtlottery.com/games/instant-tickets/outstanding-prizes")!
        let session = URLSession.shared

        let request = NSMutableURLRequest(url: url)
        request.httpMethod = "POST"
        request.cachePolicy = NSURLRequest.CachePolicy.reloadIgnoringCacheData

        let paramString = "data=Hello"
        request.httpBody = paramString.data(using: String.Encoding.utf8)

        let task = session().dataTask(with: request as URLRequest) { data, response, error in
            guard let data = data, let _ = response where error == nil else {
                print("error")
                return
            }

            var index = 0

            // Many columns has newlines, tabs or spaces for their
            // textual content. Here, define a character set to trim
            // them off
            let spaceCharacterSet = CharacterSet(charactersIn: "\n\t ")

            let html = HTMLDocument(data: data, contentTypeHeader: "text/html; charset=utf-8")
            for node in html.nodes(matchingSelector: "#tblData tbody tr") {
                let columns = node.nodes(matchingSelector: "td")

                let topPrices = columns[3].nodes(matchingSelector: "p").map { $0.textContent }
                let unclaimedTopPrices = columns[4].nodes(matchingSelector: "p").map { $0.textContent }

                // You have to open the Web Inspector in Safari to grab
                // the table's structure
                let rowData: [String: AnyObject] = [
                    "Price"                : columns[0].textContent.trimmingCharacters(in : spaceCharacterSet),
                    "Game #"               : columns[1].textContent.trimmingCharacters(in : spaceCharacterSet),
                    "Game Name"            : columns[2].textContent.trimmingCharacters(in : spaceCharacterSet),
                    "Top Prices"           : topPrices,
                    "Unclaimed Top Prizes" : unclaimedTopPrices,
                    "Total Unclaimed"      : columns[5].textContent.trimmingCharacters(in : spaceCharacterSet),
                    "% Sold"               : columns[6].textContent.trimmingCharacters(in : spaceCharacterSet),
                    "# of tickets"         : columns[7].textContent.trimmingCharacters(in : spaceCharacterSet)
                ]

                index += 1
                self.result["row \(index)"] = rowData
            }

            // It's better if you put a breakpoint on the line below.
            // Swift 3's logging is too verbose at the moment.
            print(self.result["row 1"]!["Unclaimed Top Prizes"])
        }

        task.resume()
    }
}